1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jsstr.h"
8 
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
17 
18 #include <ctype.h>
19 #include <string.h>
20 
21 #include "jsapi.h"
22 #include "jsarray.h"
23 #include "jsatom.h"
24 #include "jsbool.h"
25 #include "jscntxt.h"
26 #include "jsgc.h"
27 #include "jsnum.h"
28 #include "jsobj.h"
29 #include "jsopcode.h"
30 #include "jstypes.h"
31 #include "jsutil.h"
32 
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
35 #include "jit/InlinableNatives.h"
36 #include "js/Conversions.h"
37 #if ENABLE_INTL_API
38 #include "unicode/unorm.h"
39 #endif
40 #include "vm/GlobalObject.h"
41 #include "vm/Interpreter.h"
42 #include "vm/Opcodes.h"
43 #include "vm/Printer.h"
44 #include "vm/RegExpObject.h"
45 #include "vm/RegExpStatics.h"
46 #include "vm/ScopeObject.h"
47 #include "vm/StringBuffer.h"
48 
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
52 #include "vm/TypeInference-inl.h"
53 
54 using namespace js;
55 using namespace js::gc;
56 using namespace js::unicode;
57 
58 using JS::Symbol;
59 using JS::SymbolCode;
60 using JS::ToInt32;
61 using JS::ToUint32;
62 
63 using mozilla::AssertedCast;
64 using mozilla::CheckedInt;
65 using mozilla::IsNaN;
66 using mozilla::IsNegativeZero;
67 using mozilla::IsSame;
68 using mozilla::Move;
69 using mozilla::PodCopy;
70 using mozilla::PodEqual;
71 using mozilla::RangedPtr;
72 using mozilla::UniquePtr;
73 
74 using JS::AutoCheckCannotGC;
75 
76 static JSLinearString*
ArgToRootedString(JSContext * cx,const CallArgs & args,unsigned argno)77 ArgToRootedString(JSContext* cx, const CallArgs& args, unsigned argno)
78 {
79     if (argno >= args.length())
80         return cx->names().undefined;
81 
82     JSString* str = ToString<CanGC>(cx, args[argno]);
83     if (!str)
84         return nullptr;
85 
86     args[argno].setString(str);
87     return str->ensureLinear(cx);
88 }
89 
90 /*
91  * Forward declarations for URI encode/decode and helper routines
92  */
93 static bool
94 str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
95 
96 static bool
97 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
98 
99 static bool
100 str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
101 
102 static bool
103 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
104 
105 /*
106  * Global string methods
107  */
108 
109 
110 /* ES5 B.2.1 */
111 template <typename CharT>
112 static Latin1Char*
Escape(JSContext * cx,const CharT * chars,uint32_t length,uint32_t * newLengthOut)113 Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut)
114 {
115     static const uint8_t shouldPassThrough[128] = {
116          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118          0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
119          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
120          1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
121          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
122          0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
123          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,       /*   pqrstuvwxyz{\}~  DEL */
124     };
125 
126     /* Take a first pass and see how big the result string will need to be. */
127     uint32_t newLength = length;
128     for (size_t i = 0; i < length; i++) {
129         char16_t ch = chars[i];
130         if (ch < 128 && shouldPassThrough[ch])
131             continue;
132 
133         /* The character will be encoded as %XX or %uXXXX. */
134         newLength += (ch < 256) ? 2 : 5;
135 
136         /*
137          * newlength is incremented by at most 5 on each iteration, so worst
138          * case newlength == length * 6. This can't overflow.
139          */
140         static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
141                       "newlength must not overflow");
142     }
143 
144     Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1);
145     if (!newChars)
146         return nullptr;
147 
148     static const char digits[] = "0123456789ABCDEF";
149 
150     size_t i, ni;
151     for (i = 0, ni = 0; i < length; i++) {
152         char16_t ch = chars[i];
153         if (ch < 128 && shouldPassThrough[ch]) {
154             newChars[ni++] = ch;
155         } else if (ch < 256) {
156             newChars[ni++] = '%';
157             newChars[ni++] = digits[ch >> 4];
158             newChars[ni++] = digits[ch & 0xF];
159         } else {
160             newChars[ni++] = '%';
161             newChars[ni++] = 'u';
162             newChars[ni++] = digits[ch >> 12];
163             newChars[ni++] = digits[(ch & 0xF00) >> 8];
164             newChars[ni++] = digits[(ch & 0xF0) >> 4];
165             newChars[ni++] = digits[ch & 0xF];
166         }
167     }
168     MOZ_ASSERT(ni == newLength);
169     newChars[newLength] = 0;
170 
171     *newLengthOut = newLength;
172     return newChars;
173 }
174 
175 static bool
str_escape(JSContext * cx,unsigned argc,Value * vp)176 str_escape(JSContext* cx, unsigned argc, Value* vp)
177 {
178     CallArgs args = CallArgsFromVp(argc, vp);
179 
180     JSLinearString* str = ArgToRootedString(cx, args, 0);
181     if (!str)
182         return false;
183 
184     ScopedJSFreePtr<Latin1Char> newChars;
185     uint32_t newLength = 0;  // initialize to silence GCC warning
186     if (str->hasLatin1Chars()) {
187         AutoCheckCannotGC nogc;
188         newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength);
189     } else {
190         AutoCheckCannotGC nogc;
191         newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength);
192     }
193 
194     if (!newChars)
195         return false;
196 
197     JSString* res = NewString<CanGC>(cx, newChars.get(), newLength);
198     if (!res)
199         return false;
200 
201     newChars.forget();
202     args.rval().setString(res);
203     return true;
204 }
205 
206 template <typename CharT>
207 static inline bool
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)208 Unhex4(const RangedPtr<const CharT> chars, char16_t* result)
209 {
210     char16_t a = chars[0],
211              b = chars[1],
212              c = chars[2],
213              d = chars[3];
214 
215     if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
216         return false;
217 
218     *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
219     return true;
220 }
221 
222 template <typename CharT>
223 static inline bool
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)224 Unhex2(const RangedPtr<const CharT> chars, char16_t* result)
225 {
226     char16_t a = chars[0],
227              b = chars[1];
228 
229     if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
230         return false;
231 
232     *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
233     return true;
234 }
235 
236 template <typename CharT>
237 static bool
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)238 Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars)
239 {
240     /*
241      * NB: use signed integers for length/index to allow simple length
242      * comparisons without unsigned-underflow hazards.
243      */
244     static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer");
245     int length = AssertedCast<int>(chars.length());
246 
247     /*
248      * Note that the spec algorithm has been optimized to avoid building
249      * a string in the case where no escapes are present.
250      */
251 
252     /* Step 4. */
253     int k = 0;
254     bool building = false;
255 
256     /* Step 5. */
257     while (k < length) {
258         /* Step 6. */
259         char16_t c = chars[k];
260 
261         /* Step 7. */
262         if (c != '%')
263             goto step_18;
264 
265         /* Step 8. */
266         if (k > length - 6)
267             goto step_14;
268 
269         /* Step 9. */
270         if (chars[k + 1] != 'u')
271             goto step_14;
272 
273 #define ENSURE_BUILDING                                      \
274         do {                                                 \
275             if (!building) {                                 \
276                 building = true;                             \
277                 if (!sb.reserve(length))                     \
278                     return false;                            \
279                 sb.infallibleAppend(chars.start().get(), k); \
280             }                                                \
281         } while(false);
282 
283         /* Step 10-13. */
284         if (Unhex4(chars.start() + k + 2, &c)) {
285             ENSURE_BUILDING;
286             k += 5;
287             goto step_18;
288         }
289 
290       step_14:
291         /* Step 14. */
292         if (k > length - 3)
293             goto step_18;
294 
295         /* Step 15-17. */
296         if (Unhex2(chars.start() + k + 1, &c)) {
297             ENSURE_BUILDING;
298             k += 2;
299         }
300 
301       step_18:
302         if (building && !sb.append(c))
303             return false;
304 
305         /* Step 19. */
306         k += 1;
307     }
308 
309     return true;
310 #undef ENSURE_BUILDING
311 }
312 
313 /* ES5 B.2.2 */
314 static bool
str_unescape(JSContext * cx,unsigned argc,Value * vp)315 str_unescape(JSContext* cx, unsigned argc, Value* vp)
316 {
317     CallArgs args = CallArgsFromVp(argc, vp);
318 
319     /* Step 1. */
320     RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
321     if (!str)
322         return false;
323 
324     /* Step 3. */
325     StringBuffer sb(cx);
326     if (str->hasTwoByteChars() && !sb.ensureTwoByteChars())
327         return false;
328 
329     if (str->hasLatin1Chars()) {
330         AutoCheckCannotGC nogc;
331         if (!Unescape(sb, str->latin1Range(nogc)))
332             return false;
333     } else {
334         AutoCheckCannotGC nogc;
335         if (!Unescape(sb, str->twoByteRange(nogc)))
336             return false;
337     }
338 
339     JSLinearString* result;
340     if (!sb.empty()) {
341         result = sb.finishString();
342         if (!result)
343             return false;
344     } else {
345         result = str;
346     }
347 
348     args.rval().setString(result);
349     return true;
350 }
351 
352 #if JS_HAS_UNEVAL
353 static bool
str_uneval(JSContext * cx,unsigned argc,Value * vp)354 str_uneval(JSContext* cx, unsigned argc, Value* vp)
355 {
356     CallArgs args = CallArgsFromVp(argc, vp);
357     JSString* str = ValueToSource(cx, args.get(0));
358     if (!str)
359         return false;
360 
361     args.rval().setString(str);
362     return true;
363 }
364 #endif
365 
366 static const JSFunctionSpec string_functions[] = {
367     JS_FN(js_escape_str,             str_escape,                1, JSPROP_RESOLVING),
368     JS_FN(js_unescape_str,           str_unescape,              1, JSPROP_RESOLVING),
369 #if JS_HAS_UNEVAL
370     JS_FN(js_uneval_str,             str_uneval,                1, JSPROP_RESOLVING),
371 #endif
372     JS_FN(js_decodeURI_str,          str_decodeURI,             1, JSPROP_RESOLVING),
373     JS_FN(js_encodeURI_str,          str_encodeURI,             1, JSPROP_RESOLVING),
374     JS_FN(js_decodeURIComponent_str, str_decodeURI_Component,   1, JSPROP_RESOLVING),
375     JS_FN(js_encodeURIComponent_str, str_encodeURI_Component,   1, JSPROP_RESOLVING),
376 
377     JS_FS_END
378 };
379 
380 static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
381 
382 static bool
str_enumerate(JSContext * cx,HandleObject obj)383 str_enumerate(JSContext* cx, HandleObject obj)
384 {
385     RootedString str(cx, obj->as<StringObject>().unbox());
386     RootedValue value(cx);
387     for (size_t i = 0, length = str->length(); i < length; i++) {
388         JSString* str1 = NewDependentString(cx, str, i, 1);
389         if (!str1)
390             return false;
391         value.setString(str1);
392         if (!DefineElement(cx, obj, i, value, nullptr, nullptr,
393                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
394         {
395             return false;
396         }
397     }
398 
399     return true;
400 }
401 
402 static bool
str_mayResolve(const JSAtomState &,jsid id,JSObject *)403 str_mayResolve(const JSAtomState&, jsid id, JSObject*)
404 {
405     // str_resolve ignores non-integer ids.
406     return JSID_IS_INT(id);
407 }
408 
409 static bool
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)410 str_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp)
411 {
412     if (!JSID_IS_INT(id))
413         return true;
414 
415     RootedString str(cx, obj->as<StringObject>().unbox());
416 
417     int32_t slot = JSID_TO_INT(id);
418     if ((size_t)slot < str->length()) {
419         JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
420         if (!str1)
421             return false;
422         RootedValue value(cx, StringValue(str1));
423         if (!DefineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
424                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
425         {
426             return false;
427         }
428         *resolvedp = true;
429     }
430     return true;
431 }
432 
433 const Class StringObject::class_ = {
434     js_String_str,
435     JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
436     JSCLASS_HAS_CACHED_PROTO(JSProto_String),
437     nullptr, /* addProperty */
438     nullptr, /* delProperty */
439     nullptr, /* getProperty */
440     nullptr, /* setProperty */
441     str_enumerate,
442     str_resolve,
443     str_mayResolve
444 };
445 
446 /*
447  * Returns a JSString * for the |this| value associated with 'call', or throws
448  * a TypeError if |this| is null or undefined.  This algorithm is the same as
449  * calling CheckObjectCoercible(this), then returning ToString(this), as all
450  * String.prototype.* methods do (other than toString and valueOf).
451  */
452 static MOZ_ALWAYS_INLINE JSString*
ThisToStringForStringProto(JSContext * cx,CallReceiver call)453 ThisToStringForStringProto(JSContext* cx, CallReceiver call)
454 {
455     JS_CHECK_RECURSION(cx, return nullptr);
456 
457     if (call.thisv().isString())
458         return call.thisv().toString();
459 
460     if (call.thisv().isObject()) {
461         RootedObject obj(cx, &call.thisv().toObject());
462         if (obj->is<StringObject>()) {
463             StringObject* nobj = &obj->as<StringObject>();
464             Rooted<jsid> id(cx, NameToId(cx->names().toString));
465             if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, str_toString)) {
466                 JSString* str = nobj->unbox();
467                 call.setThis(StringValue(str));
468                 return str;
469             }
470         }
471     } else if (call.thisv().isNullOrUndefined()) {
472         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
473                              call.thisv().isNull() ? "null" : "undefined", "object");
474         return nullptr;
475     }
476 
477     JSString* str = ToStringSlow<CanGC>(cx, call.thisv());
478     if (!str)
479         return nullptr;
480 
481     call.setThis(StringValue(str));
482     return str;
483 }
484 
485 MOZ_ALWAYS_INLINE bool
IsString(HandleValue v)486 IsString(HandleValue v)
487 {
488     return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
489 }
490 
491 #if JS_HAS_TOSOURCE
492 
493 MOZ_ALWAYS_INLINE bool
str_toSource_impl(JSContext * cx,const CallArgs & args)494 str_toSource_impl(JSContext* cx, const CallArgs& args)
495 {
496     MOZ_ASSERT(IsString(args.thisv()));
497 
498     Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
499     if (!str)
500         return false;
501 
502     str = QuoteString(cx, str, '"');
503     if (!str)
504         return false;
505 
506     StringBuffer sb(cx);
507     if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
508         return false;
509 
510     str = sb.finishString();
511     if (!str)
512         return false;
513     args.rval().setString(str);
514     return true;
515 }
516 
517 static bool
str_toSource(JSContext * cx,unsigned argc,Value * vp)518 str_toSource(JSContext* cx, unsigned argc, Value* vp)
519 {
520     CallArgs args = CallArgsFromVp(argc, vp);
521     return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
522 }
523 
524 #endif /* JS_HAS_TOSOURCE */
525 
526 MOZ_ALWAYS_INLINE bool
str_toString_impl(JSContext * cx,const CallArgs & args)527 str_toString_impl(JSContext* cx, const CallArgs& args)
528 {
529     MOZ_ASSERT(IsString(args.thisv()));
530 
531     args.rval().setString(args.thisv().isString()
532                               ? args.thisv().toString()
533                               : args.thisv().toObject().as<StringObject>().unbox());
534     return true;
535 }
536 
537 bool
str_toString(JSContext * cx,unsigned argc,Value * vp)538 js::str_toString(JSContext* cx, unsigned argc, Value* vp)
539 {
540     CallArgs args = CallArgsFromVp(argc, vp);
541     return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
542 }
543 
544 /*
545  * Java-like string native methods.
546  */
547 
548 JSString*
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)549 js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt)
550 {
551     MOZ_ASSERT(0 <= beginInt);
552     MOZ_ASSERT(0 <= lengthInt);
553     MOZ_ASSERT(uint32_t(beginInt) <= str->length());
554     MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
555 
556     uint32_t begin = beginInt;
557     uint32_t len = lengthInt;
558 
559     /*
560      * Optimization for one level deep ropes.
561      * This is common for the following pattern:
562      *
563      * while() {
564      *   text = text.substr(0, x) + "bla" + text.substr(x)
565      *   test.charCodeAt(x + 1)
566      * }
567      */
568     if (str->isRope()) {
569         JSRope* rope = &str->asRope();
570 
571         /* Substring is totally in leftChild of rope. */
572         if (begin + len <= rope->leftChild()->length())
573             return NewDependentString(cx, rope->leftChild(), begin, len);
574 
575         /* Substring is totally in rightChild of rope. */
576         if (begin >= rope->leftChild()->length()) {
577             begin -= rope->leftChild()->length();
578             return NewDependentString(cx, rope->rightChild(), begin, len);
579         }
580 
581         /*
582          * Requested substring is partly in the left and partly in right child.
583          * Create a rope of substrings for both childs.
584          */
585         MOZ_ASSERT(begin < rope->leftChild()->length() &&
586                    begin + len > rope->leftChild()->length());
587 
588         size_t lhsLength = rope->leftChild()->length() - begin;
589         size_t rhsLength = begin + len - rope->leftChild()->length();
590 
591         Rooted<JSRope*> ropeRoot(cx, rope);
592         RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
593         if (!lhs)
594             return nullptr;
595 
596         RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
597         if (!rhs)
598             return nullptr;
599 
600         return JSRope::new_<CanGC>(cx, lhs, rhs, len);
601     }
602 
603     return NewDependentString(cx, str, begin, len);
604 }
605 
606 template <typename CharT>
607 static JSString*
ToLowerCase(JSContext * cx,JSLinearString * str)608 ToLowerCase(JSContext* cx, JSLinearString* str)
609 {
610     // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
611     // is a Latin1 string, the output is also a Latin1 string.
612     UniquePtr<CharT[], JS::FreePolicy> newChars;
613     size_t length = str->length();
614     {
615         AutoCheckCannotGC nogc;
616         const CharT* chars = str->chars<CharT>(nogc);
617 
618         // Look for the first upper case character.
619         size_t i = 0;
620         for (; i < length; i++) {
621             char16_t c = chars[i];
622             if (unicode::CanLowerCase(c))
623                 break;
624         }
625 
626         // If all characters are lower case, return the input string.
627         if (i == length)
628             return str;
629 
630         newChars = cx->make_pod_array<CharT>(length + 1);
631         if (!newChars)
632             return nullptr;
633 
634         PodCopy(newChars.get(), chars, i);
635 
636         for (; i < length; i++) {
637             char16_t c = unicode::ToLowerCase(chars[i]);
638             MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
639             newChars[i] = c;
640         }
641 
642         newChars[length] = 0;
643     }
644 
645     JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
646     if (!res)
647         return nullptr;
648 
649     newChars.release();
650     return res;
651 }
652 
653 static inline bool
ToLowerCaseHelper(JSContext * cx,CallReceiver call)654 ToLowerCaseHelper(JSContext* cx, CallReceiver call)
655 {
656     RootedString str(cx, ThisToStringForStringProto(cx, call));
657     if (!str)
658         return false;
659 
660     JSLinearString* linear = str->ensureLinear(cx);
661     if (!linear)
662         return false;
663 
664     if (linear->hasLatin1Chars())
665         str = ToLowerCase<Latin1Char>(cx, linear);
666     else
667         str = ToLowerCase<char16_t>(cx, linear);
668     if (!str)
669         return false;
670 
671     call.rval().setString(str);
672     return true;
673 }
674 
675 bool
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)676 js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
677 {
678     return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
679 }
680 
681 static bool
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)682 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
683 {
684     CallArgs args = CallArgsFromVp(argc, vp);
685 
686     /*
687      * Forcefully ignore the first (or any) argument and return toLowerCase(),
688      * ECMA has reserved that argument, presumably for defining the locale.
689      */
690     if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
691         RootedString str(cx, ThisToStringForStringProto(cx, args));
692         if (!str)
693             return false;
694 
695         RootedValue result(cx);
696         if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
697             return false;
698 
699         args.rval().set(result);
700         return true;
701     }
702 
703     return ToLowerCaseHelper(cx, args);
704 }
705 
706 template <typename DestChar, typename SrcChar>
707 static void
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t firstLowerCase,size_t length)708 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
709 {
710     MOZ_ASSERT(firstLowerCase < length);
711 
712     for (size_t i = 0; i < firstLowerCase; i++)
713         destChars[i] = srcChars[i];
714 
715     for (size_t i = firstLowerCase; i < length; i++) {
716         char16_t c = unicode::ToUpperCase(srcChars[i]);
717         MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
718         destChars[i] = c;
719     }
720 
721     destChars[length] = '\0';
722 }
723 
724 template <typename CharT>
725 static JSString*
ToUpperCase(JSContext * cx,JSLinearString * str)726 ToUpperCase(JSContext* cx, JSLinearString* str)
727 {
728     typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
729     typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
730 
731     mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
732     size_t length = str->length();
733     {
734         AutoCheckCannotGC nogc;
735         const CharT* chars = str->chars<CharT>(nogc);
736 
737         // Look for the first lower case character.
738         size_t i = 0;
739         for (; i < length; i++) {
740             char16_t c = chars[i];
741             if (unicode::CanUpperCase(c))
742                 break;
743         }
744 
745         // If all characters are upper case, return the input string.
746         if (i == length)
747             return str;
748 
749         // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
750         // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
751         // upper case characters are not in the Latin1 range.
752         bool resultIsLatin1;
753         if (IsSame<CharT, Latin1Char>::value) {
754             resultIsLatin1 = true;
755             for (size_t j = i; j < length; j++) {
756                 Latin1Char c = chars[j];
757                 if (c == 0xb5 || c == 0xff) {
758                     MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
759                     resultIsLatin1 = false;
760                     break;
761                 } else {
762                     MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
763                 }
764             }
765         } else {
766             resultIsLatin1 = false;
767         }
768 
769         if (resultIsLatin1) {
770             Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
771             if (!buf)
772                 return nullptr;
773 
774             ToUpperCaseImpl(buf.get(), chars, i, length);
775             newChars.construct<Latin1CharPtr>(Move(buf));
776         } else {
777             TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
778             if (!buf)
779                 return nullptr;
780 
781             ToUpperCaseImpl(buf.get(), chars, i, length);
782             newChars.construct<TwoByteCharPtr>(Move(buf));
783         }
784     }
785 
786     JSString* res;
787     if (newChars.constructed<Latin1CharPtr>()) {
788         res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
789         if (!res)
790             return nullptr;
791 
792         newChars.ref<Latin1CharPtr>().release();
793     } else {
794         res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
795         if (!res)
796             return nullptr;
797 
798         newChars.ref<TwoByteCharPtr>().release();
799     }
800 
801     return res;
802 }
803 
804 static bool
ToUpperCaseHelper(JSContext * cx,CallReceiver call)805 ToUpperCaseHelper(JSContext* cx, CallReceiver call)
806 {
807     RootedString str(cx, ThisToStringForStringProto(cx, call));
808     if (!str)
809         return false;
810 
811     JSLinearString* linear = str->ensureLinear(cx);
812     if (!linear)
813         return false;
814 
815     if (linear->hasLatin1Chars())
816         str = ToUpperCase<Latin1Char>(cx, linear);
817     else
818         str = ToUpperCase<char16_t>(cx, linear);
819     if (!str)
820         return false;
821 
822     call.rval().setString(str);
823     return true;
824 }
825 
826 bool
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)827 js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
828 {
829     return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
830 }
831 
832 static bool
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)833 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
834 {
835     CallArgs args = CallArgsFromVp(argc, vp);
836 
837     /*
838      * Forcefully ignore the first (or any) argument and return toUpperCase(),
839      * ECMA has reserved that argument, presumably for defining the locale.
840      */
841     if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
842         RootedString str(cx, ThisToStringForStringProto(cx, args));
843         if (!str)
844             return false;
845 
846         RootedValue result(cx);
847         if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
848             return false;
849 
850         args.rval().set(result);
851         return true;
852     }
853 
854     return ToUpperCaseHelper(cx, args);
855 }
856 
857 #if !EXPOSE_INTL_API
858 static bool
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)859 str_localeCompare(JSContext* cx, unsigned argc, Value* vp)
860 {
861     CallArgs args = CallArgsFromVp(argc, vp);
862     RootedString str(cx, ThisToStringForStringProto(cx, args));
863     if (!str)
864         return false;
865 
866     RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
867     if (!thatStr)
868         return false;
869 
870     if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
871         RootedValue result(cx);
872         if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
873             return false;
874 
875         args.rval().set(result);
876         return true;
877     }
878 
879     int32_t result;
880     if (!CompareStrings(cx, str, thatStr, &result))
881         return false;
882 
883     args.rval().setInt32(result);
884     return true;
885 }
886 #endif
887 
888 #if EXPOSE_INTL_API
889 /* ES6 20140210 draft 21.1.3.12. */
890 static bool
str_normalize(JSContext * cx,unsigned argc,Value * vp)891 str_normalize(JSContext* cx, unsigned argc, Value* vp)
892 {
893     CallArgs args = CallArgsFromVp(argc, vp);
894 
895     // Steps 1-3.
896     RootedString str(cx, ThisToStringForStringProto(cx, args));
897     if (!str)
898         return false;
899 
900     // Step 4.
901     UNormalizationMode form;
902     if (!args.hasDefined(0)) {
903         form = UNORM_NFC;
904     } else {
905         // Steps 5-6.
906         RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
907         if (!formStr)
908             return false;
909 
910         // Step 7.
911         if (EqualStrings(formStr, cx->names().NFC)) {
912             form = UNORM_NFC;
913         } else if (EqualStrings(formStr, cx->names().NFD)) {
914             form = UNORM_NFD;
915         } else if (EqualStrings(formStr, cx->names().NFKC)) {
916             form = UNORM_NFKC;
917         } else if (EqualStrings(formStr, cx->names().NFKD)) {
918             form = UNORM_NFKD;
919         } else {
920             JS_ReportErrorNumber(cx, GetErrorMessage, nullptr,
921                                  JSMSG_INVALID_NORMALIZE_FORM);
922             return false;
923         }
924     }
925 
926     // Step 8.
927     AutoStableStringChars stableChars(cx);
928     if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
929         return false;
930 
931     static const size_t INLINE_CAPACITY = 32;
932 
933     const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().start().get());
934     int32_t srcLen = AssertedCast<int32_t>(str->length());
935     Vector<char16_t, INLINE_CAPACITY> chars(cx);
936     if (!chars.resize(INLINE_CAPACITY))
937         return false;
938 
939     UErrorCode status = U_ZERO_ERROR;
940     int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
941                                    Char16ToUChar(chars.begin()), INLINE_CAPACITY,
942                                    &status);
943     if (status == U_BUFFER_OVERFLOW_ERROR) {
944         if (!chars.resize(size))
945             return false;
946         status = U_ZERO_ERROR;
947 #ifdef DEBUG
948         int32_t finalSize =
949 #endif
950         unorm_normalize(srcChars, srcLen, form, 0,
951                         Char16ToUChar(chars.begin()), size,
952                         &status);
953         MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
954     }
955     if (U_FAILURE(status))
956         return false;
957 
958     JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
959     if (!ns)
960         return false;
961 
962     // Step 9.
963     args.rval().setString(ns);
964     return true;
965 }
966 #endif
967 
968 bool
str_charAt(JSContext * cx,unsigned argc,Value * vp)969 js::str_charAt(JSContext* cx, unsigned argc, Value* vp)
970 {
971     CallArgs args = CallArgsFromVp(argc, vp);
972 
973     RootedString str(cx);
974     size_t i;
975     if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
976         str = args.thisv().toString();
977         i = size_t(args[0].toInt32());
978         if (i >= str->length())
979             goto out_of_range;
980     } else {
981         str = ThisToStringForStringProto(cx, args);
982         if (!str)
983             return false;
984 
985         double d = 0.0;
986         if (args.length() > 0 && !ToInteger(cx, args[0], &d))
987             return false;
988 
989         if (d < 0 || str->length() <= d)
990             goto out_of_range;
991         i = size_t(d);
992     }
993 
994     str = cx->staticStrings().getUnitStringForElement(cx, str, i);
995     if (!str)
996         return false;
997     args.rval().setString(str);
998     return true;
999 
1000   out_of_range:
1001     args.rval().setString(cx->runtime()->emptyString);
1002     return true;
1003 }
1004 
1005 bool
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1006 js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res)
1007 {
1008     RootedString str(cx);
1009     size_t i;
1010     if (index.isInt32()) {
1011         i = index.toInt32();
1012         if (i >= string->length())
1013             goto out_of_range;
1014     } else {
1015         double d = 0.0;
1016         if (!ToInteger(cx, index, &d))
1017             return false;
1018         // check whether d is negative as size_t is unsigned
1019         if (d < 0 || string->length() <= d )
1020             goto out_of_range;
1021         i = size_t(d);
1022     }
1023     char16_t c;
1024     if (!string->getChar(cx, i , &c))
1025         return false;
1026     res.setInt32(c);
1027     return true;
1028 
1029 out_of_range:
1030     res.setNaN();
1031     return true;
1032 }
1033 
1034 bool
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1035 js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp)
1036 {
1037     CallArgs args = CallArgsFromVp(argc, vp);
1038     RootedString str(cx);
1039     RootedValue index(cx);
1040     if (args.thisv().isString()) {
1041         str = args.thisv().toString();
1042     } else {
1043         str = ThisToStringForStringProto(cx, args);
1044         if (!str)
1045             return false;
1046     }
1047     if (args.length() != 0)
1048         index = args[0];
1049     else
1050         index.setInt32(0);
1051 
1052     return js::str_charCodeAt_impl(cx, str, index, args.rval());
1053 }
1054 
1055 /*
1056  * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1057  * The patlen argument must be positive and no greater than sBMHPatLenMax.
1058  *
1059  * Return the index of pat in text, or -1 if not found.
1060  */
1061 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1062 static const uint32_t sBMHPatLenMax   = 255; /* skip table element is uint8_t */
1063 static const int      sBMHBadPattern  = -2;  /* return value if pat is not ISO-Latin-1 */
1064 
1065 template <typename TextChar, typename PatChar>
1066 static int
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1067 BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1068 {
1069     MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1070 
1071     uint8_t skip[sBMHCharSetSize];
1072     for (uint32_t i = 0; i < sBMHCharSetSize; i++)
1073         skip[i] = uint8_t(patLen);
1074 
1075     uint32_t patLast = patLen - 1;
1076     for (uint32_t i = 0; i < patLast; i++) {
1077         char16_t c = pat[i];
1078         if (c >= sBMHCharSetSize)
1079             return sBMHBadPattern;
1080         skip[c] = uint8_t(patLast - i);
1081     }
1082 
1083     for (uint32_t k = patLast; k < textLen; ) {
1084         for (uint32_t i = k, j = patLast; ; i--, j--) {
1085             if (text[i] != pat[j])
1086                 break;
1087             if (j == 0)
1088                 return static_cast<int>(i);  /* safe: max string size */
1089         }
1090 
1091         char16_t c = text[k];
1092         k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1093     }
1094     return -1;
1095 }
1096 
1097 template <typename TextChar, typename PatChar>
1098 struct MemCmp {
1099     typedef uint32_t Extent;
computeExtentMemCmp1100     static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) {
1101         return (patLen - 1) * sizeof(PatChar);
1102     }
matchMemCmp1103     static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1104         MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1105         return memcmp(p, t, extent) == 0;
1106     }
1107 };
1108 
1109 template <typename TextChar, typename PatChar>
1110 struct ManualCmp {
1111     typedef const PatChar* Extent;
computeExtentManualCmp1112     static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) {
1113         return pat + patLen;
1114     }
matchManualCmp1115     static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1116         for (; p != extent; ++p, ++t) {
1117             if (*p != *t)
1118                 return false;
1119         }
1120         return true;
1121     }
1122 };
1123 
1124 template <typename TextChar, typename PatChar>
1125 static const TextChar*
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1126 FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat)
1127 {
1128     const TextChar* textend = text + n;
1129     const TextChar* t = text;
1130 
1131     switch ((textend - t) & 7) {
1132         case 0: if (*t++ == pat) return t - 1;
1133         case 7: if (*t++ == pat) return t - 1;
1134         case 6: if (*t++ == pat) return t - 1;
1135         case 5: if (*t++ == pat) return t - 1;
1136         case 4: if (*t++ == pat) return t - 1;
1137         case 3: if (*t++ == pat) return t - 1;
1138         case 2: if (*t++ == pat) return t - 1;
1139         case 1: if (*t++ == pat) return t - 1;
1140     }
1141     while (textend != t) {
1142         if (t[0] == pat) return t;
1143         if (t[1] == pat) return t + 1;
1144         if (t[2] == pat) return t + 2;
1145         if (t[3] == pat) return t + 3;
1146         if (t[4] == pat) return t + 4;
1147         if (t[5] == pat) return t + 5;
1148         if (t[6] == pat) return t + 6;
1149         if (t[7] == pat) return t + 7;
1150         t += 8;
1151     }
1152     return nullptr;
1153 }
1154 
1155 static const char*
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1156 FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
1157 {
1158 #if  defined(__clang__)
1159     return FirstCharMatcherUnrolled<char, char>(text, n, pat);
1160 #else
1161     return reinterpret_cast<const char*>(memchr(text, pat, n));
1162 #endif
1163 }
1164 
1165 static const char16_t*
FirstCharMatcher16bit(const char16_t * text,uint32_t n,const char16_t pat)1166 FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat)
1167 {
1168 #if defined(XP_DARWIN) || defined(XP_WIN)
1169     /*
1170      * Performance of memchr is horrible in OSX. Windows is better,
1171      * but it is still better to use UnrolledMatcher.
1172      */
1173     return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat);
1174 #else
1175     /*
1176      * For linux the best performance is obtained by slightly hacking memchr.
1177      * memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
1178      * in blocks of 8bit and use memchr.
1179      */
1180 
1181     const char* text8 = (const char*) text;
1182     const char* pat8 = reinterpret_cast<const char*>(&pat);
1183 
1184     MOZ_ASSERT(n < UINT32_MAX/2);
1185     n *= 2;
1186 
1187     uint32_t i = 0;
1188     while (i < n) {
1189         /* Find the first 8 bits of 16bit character in text. */
1190         const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
1191         if (pos8 == nullptr)
1192             return nullptr;
1193         i = static_cast<uint32_t>(pos8 - text8);
1194 
1195         /* Incorrect match if it matches the last 8 bits of 16bit char. */
1196         if (i % 2 != 0) {
1197             i++;
1198             continue;
1199         }
1200 
1201         /* Test if last 8 bits match last 8 bits of 16bit char. */
1202         if (pat8[1] == text8[i + 1])
1203             return (text + (i/2));
1204 
1205         i += 2;
1206     }
1207     return nullptr;
1208 #endif
1209 }
1210 
1211 template <class InnerMatch, typename TextChar, typename PatChar>
1212 static int
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1213 Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
1214 {
1215     const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
1216 
1217     uint32_t i = 0;
1218     uint32_t n = textlen - patlen + 1;
1219     while (i < n) {
1220         const TextChar* pos;
1221 
1222         if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
1223             pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]);
1224         else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
1225             pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
1226         else
1227             pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
1228 
1229         if (pos == nullptr)
1230             return -1;
1231 
1232         i = static_cast<uint32_t>(pos - text);
1233         if (InnerMatch::match(pat + 1, text + i + 1, extent))
1234             return i;
1235 
1236         i += 1;
1237      }
1238      return -1;
1239  }
1240 
1241 
1242 template <typename TextChar, typename PatChar>
1243 static MOZ_ALWAYS_INLINE int
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1244 StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1245 {
1246     if (patLen == 0)
1247         return 0;
1248     if (textLen < patLen)
1249         return -1;
1250 
1251 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1252     /*
1253      * Given enough registers, the unrolled loop below is faster than the
1254      * following loop. 32-bit x86 does not have enough registers.
1255      */
1256     if (patLen == 1) {
1257         const PatChar p0 = *pat;
1258         const TextChar* end = text + textLen;
1259         for (const TextChar* c = text; c != end; ++c) {
1260             if (*c == p0)
1261                 return c - text;
1262         }
1263         return -1;
1264     }
1265 #endif
1266 
1267     /*
1268      * If the text or pattern string is short, BMH will be more expensive than
1269      * the basic linear scan due to initialization cost and a more complex loop
1270      * body. While the correct threshold is input-dependent, we can make a few
1271      * conservative observations:
1272      *  - When |textLen| is "big enough", the initialization time will be
1273      *    proportionally small, so the worst-case slowdown is minimized.
1274      *  - When |patLen| is "too small", even the best case for BMH will be
1275      *    slower than a simple scan for large |textLen| due to the more complex
1276      *    loop body of BMH.
1277      * From this, the values for "big enough" and "too small" are determined
1278      * empirically. See bug 526348.
1279      */
1280     if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1281         int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1282         if (index != sBMHBadPattern)
1283             return index;
1284     }
1285 
1286     /*
1287      * For big patterns with large potential overlap we want the SIMD-optimized
1288      * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1289      * use memcmp if one of the strings is TwoByte and the other is Latin1.
1290      *
1291      * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1292      */
1293     return
1294 #if !defined(__linux__)
1295         (patLen > 128 && IsSame<TextChar, PatChar>::value)
1296             ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
1297             :
1298 #endif
1299               Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen);
1300 }
1301 
1302 static int32_t
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1303 StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0)
1304 {
1305     MOZ_ASSERT(start <= text->length());
1306     uint32_t textLen = text->length() - start;
1307     uint32_t patLen = pat->length();
1308 
1309     int match;
1310     AutoCheckCannotGC nogc;
1311     if (text->hasLatin1Chars()) {
1312         const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1313         if (pat->hasLatin1Chars())
1314             match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1315         else
1316             match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1317     } else {
1318         const char16_t* textChars = text->twoByteChars(nogc) + start;
1319         if (pat->hasLatin1Chars())
1320             match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1321         else
1322             match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1323     }
1324 
1325     return (match == -1) ? -1 : start + match;
1326 }
1327 
1328 static const size_t sRopeMatchThresholdRatioLog2 = 5;
1329 
1330 bool
StringHasPattern(JSLinearString * text,const char16_t * pat,uint32_t patLen)1331 js::StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patLen)
1332 {
1333     AutoCheckCannotGC nogc;
1334     return text->hasLatin1Chars()
1335            ? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1
1336            : StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1;
1337 }
1338 
1339 int
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1340 js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start)
1341 {
1342     return StringMatch(text, pat, start);
1343 }
1344 
1345 // When an algorithm does not need a string represented as a single linear
1346 // array of characters, this range utility may be used to traverse the string a
1347 // sequence of linear arrays of characters. This avoids flattening ropes.
1348 class StringSegmentRange
1349 {
1350     // If malloc() shows up in any profiles from this vector, we can add a new
1351     // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1352     Rooted<StringVector> stack;
1353     RootedLinearString cur;
1354 
settle(JSString * str)1355     bool settle(JSString* str) {
1356         while (str->isRope()) {
1357             JSRope& rope = str->asRope();
1358             if (!stack.append(rope.rightChild()))
1359                 return false;
1360             str = rope.leftChild();
1361         }
1362         cur = &str->asLinear();
1363         return true;
1364     }
1365 
1366   public:
StringSegmentRange(JSContext * cx)1367     explicit StringSegmentRange(JSContext* cx)
1368       : stack(cx, StringVector(cx)), cur(cx)
1369     {}
1370 
init(JSString * str)1371     MOZ_WARN_UNUSED_RESULT bool init(JSString* str) {
1372         MOZ_ASSERT(stack.empty());
1373         return settle(str);
1374     }
1375 
empty() const1376     bool empty() const {
1377         return cur == nullptr;
1378     }
1379 
front() const1380     JSLinearString* front() const {
1381         MOZ_ASSERT(!cur->isRope());
1382         return cur;
1383     }
1384 
popFront()1385     MOZ_WARN_UNUSED_RESULT bool popFront() {
1386         MOZ_ASSERT(!empty());
1387         if (stack.empty()) {
1388             cur = nullptr;
1389             return true;
1390         }
1391         return settle(stack.popCopy());
1392     }
1393 };
1394 
1395 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1396 
1397 template <typename TextChar, typename PatChar>
1398 static int
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1399 RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings,
1400               const PatChar* pat, size_t patLen)
1401 {
1402     /* Absolute offset from the beginning of the logical text string. */
1403     int pos = 0;
1404 
1405     for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) {
1406         /* Try to find a match within 'outer'. */
1407         JSLinearString* outer = *outerp;
1408         const TextChar* chars = outer->chars<TextChar>(nogc);
1409         size_t len = outer->length();
1410         int matchResult = StringMatch(chars, len, pat, patLen);
1411         if (matchResult != -1) {
1412             /* Matched! */
1413             return pos + matchResult;
1414         }
1415 
1416         /* Try to find a match starting in 'outer' and running into other nodes. */
1417         const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1418         const TextChar* const textend = chars + len;
1419         const PatChar p0 = *pat;
1420         const PatChar* const p1 = pat + 1;
1421         const PatChar* const patend = pat + patLen;
1422         for (const TextChar* t = text; t != textend; ) {
1423             if (*t++ != p0)
1424                 continue;
1425 
1426             JSLinearString** innerp = outerp;
1427             const TextChar* ttend = textend;
1428             const TextChar* tt = t;
1429             for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1430                 while (tt == ttend) {
1431                     if (++innerp == strings.end())
1432                         return -1;
1433 
1434                     JSLinearString* inner = *innerp;
1435                     tt = inner->chars<TextChar>(nogc);
1436                     ttend = tt + inner->length();
1437                 }
1438                 if (*pp != *tt)
1439                     goto break_continue;
1440             }
1441 
1442             /* Matched! */
1443             return pos + (t - chars) - 1;  /* -1 because of *t++ above */
1444 
1445           break_continue:;
1446         }
1447 
1448         pos += len;
1449     }
1450 
1451     return -1;
1452 }
1453 
1454 /*
1455  * RopeMatch takes the text to search and the pattern to search for in the text.
1456  * RopeMatch returns false on OOM and otherwise returns the match index through
1457  * the 'match' outparam (-1 for not found).
1458  */
1459 static bool
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)1460 RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
1461 {
1462     uint32_t patLen = pat->length();
1463     if (patLen == 0) {
1464         *match = 0;
1465         return true;
1466     }
1467     if (text->length() < patLen) {
1468         *match = -1;
1469         return true;
1470     }
1471 
1472     /*
1473      * List of leaf nodes in the rope. If we run out of memory when trying to
1474      * append to this list, we can still fall back to StringMatch, so use the
1475      * system allocator so we don't report OOM in that case.
1476      */
1477     LinearStringVector strings;
1478 
1479     /*
1480      * We don't want to do rope matching if there is a poor node-to-char ratio,
1481      * since this means spending a lot of time in the match loop below. We also
1482      * need to build the list of leaf nodes. Do both here: iterate over the
1483      * nodes so long as there are not too many.
1484      *
1485      * We also don't use rope matching if the rope contains both Latin1 and
1486      * TwoByte nodes, to simplify the match algorithm.
1487      */
1488     {
1489         size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1490         StringSegmentRange r(cx);
1491         if (!r.init(text))
1492             return false;
1493 
1494         bool textIsLatin1 = text->hasLatin1Chars();
1495         while (!r.empty()) {
1496             if (threshold-- == 0 ||
1497                 r.front()->hasLatin1Chars() != textIsLatin1 ||
1498                 !strings.append(r.front()))
1499             {
1500                 JSLinearString* linear = text->ensureLinear(cx);
1501                 if (!linear)
1502                     return false;
1503 
1504                 *match = StringMatch(linear, pat);
1505                 return true;
1506             }
1507             if (!r.popFront())
1508                 return false;
1509         }
1510     }
1511 
1512     AutoCheckCannotGC nogc;
1513     if (text->hasLatin1Chars()) {
1514         if (pat->hasLatin1Chars())
1515             *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen);
1516         else
1517             *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen);
1518     } else {
1519         if (pat->hasLatin1Chars())
1520             *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), patLen);
1521         else
1522             *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), patLen);
1523     }
1524 
1525     return true;
1526 }
1527 
1528 /* ES6 draft rc4 21.1.3.7. */
1529 static bool
str_includes(JSContext * cx,unsigned argc,Value * vp)1530 str_includes(JSContext* cx, unsigned argc, Value* vp)
1531 {
1532     CallArgs args = CallArgsFromVp(argc, vp);
1533 
1534     // Steps 1, 2, and 3
1535     RootedString str(cx, ThisToStringForStringProto(cx, args));
1536     if (!str)
1537         return false;
1538 
1539     // Steps 4 and 5
1540     bool isRegExp;
1541     if (!IsRegExp(cx, args.get(0), &isRegExp))
1542         return false;
1543 
1544     // Step 6
1545     if (isRegExp) {
1546         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1547                              "first", "", "Regular Expression");
1548         return false;
1549     }
1550 
1551     // Steps 7 and 8
1552     RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1553     if (!searchStr)
1554         return false;
1555 
1556     // Steps 9 and 10
1557     uint32_t pos = 0;
1558     if (args.hasDefined(1)) {
1559         if (args[1].isInt32()) {
1560             int i = args[1].toInt32();
1561             pos = (i < 0) ? 0U : uint32_t(i);
1562         } else {
1563             double d;
1564             if (!ToInteger(cx, args[1], &d))
1565                 return false;
1566             pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1567         }
1568     }
1569 
1570     // Step 11
1571     uint32_t textLen = str->length();
1572 
1573     // Step 12
1574     uint32_t start = Min(Max(pos, 0U), textLen);
1575 
1576     // Steps 13 and 14
1577     JSLinearString* text = str->ensureLinear(cx);
1578     if (!text)
1579         return false;
1580 
1581     args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
1582     return true;
1583 }
1584 
1585 /* TODO: remove String.prototype.contains (bug 1103588) */
1586 static bool
str_contains(JSContext * cx,unsigned argc,Value * vp)1587 str_contains(JSContext *cx, unsigned argc, Value *vp)
1588 {
1589 #ifndef RELEASE_BUILD
1590     CallArgs args = CallArgsFromVp(argc, vp);
1591     RootedObject callee(cx, &args.callee());
1592     if (!GlobalObject::warnOnceAboutStringContains(cx, callee))
1593         return false;
1594 #endif
1595     return str_includes(cx, argc, vp);
1596 }
1597 
1598 /* ES6 20120927 draft 15.5.4.7. */
1599 bool
str_indexOf(JSContext * cx,unsigned argc,Value * vp)1600 js::str_indexOf(JSContext* cx, unsigned argc, Value* vp)
1601 {
1602     CallArgs args = CallArgsFromVp(argc, vp);
1603 
1604     // Steps 1, 2, and 3
1605     RootedString str(cx, ThisToStringForStringProto(cx, args));
1606     if (!str)
1607         return false;
1608 
1609     // Steps 4 and 5
1610     RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1611     if (!searchStr)
1612         return false;
1613 
1614     // Steps 6 and 7
1615     uint32_t pos = 0;
1616     if (args.hasDefined(1)) {
1617         if (args[1].isInt32()) {
1618             int i = args[1].toInt32();
1619             pos = (i < 0) ? 0U : uint32_t(i);
1620         } else {
1621             double d;
1622             if (!ToInteger(cx, args[1], &d))
1623                 return false;
1624             pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1625         }
1626     }
1627 
1628    // Step 8
1629     uint32_t textLen = str->length();
1630 
1631     // Step 9
1632     uint32_t start = Min(Max(pos, 0U), textLen);
1633 
1634     // Steps 10 and 11
1635     JSLinearString* text = str->ensureLinear(cx);
1636     if (!text)
1637         return false;
1638 
1639     args.rval().setInt32(StringMatch(text, searchStr, start));
1640     return true;
1641 }
1642 
1643 template <typename TextChar, typename PatChar>
1644 static int32_t
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)1645 LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen,
1646                 size_t start)
1647 {
1648     MOZ_ASSERT(patLen > 0);
1649     MOZ_ASSERT(patLen <= textLen);
1650     MOZ_ASSERT(start <= textLen - patLen);
1651 
1652     const PatChar p0 = *pat;
1653     const PatChar* patNext = pat + 1;
1654     const PatChar* patEnd = pat + patLen;
1655 
1656     for (const TextChar* t = text + start; t >= text; --t) {
1657         if (*t == p0) {
1658             const TextChar* t1 = t + 1;
1659             for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
1660                 if (*t1 != *p1)
1661                     goto break_continue;
1662             }
1663 
1664             return static_cast<int32_t>(t - text);
1665         }
1666       break_continue:;
1667     }
1668 
1669     return -1;
1670 }
1671 
1672 bool
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)1673 js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
1674 {
1675     CallArgs args = CallArgsFromVp(argc, vp);
1676     RootedString textstr(cx, ThisToStringForStringProto(cx, args));
1677     if (!textstr)
1678         return false;
1679 
1680     RootedLinearString pat(cx, ArgToRootedString(cx, args, 0));
1681     if (!pat)
1682         return false;
1683 
1684     size_t textLen = textstr->length();
1685     size_t patLen = pat->length();
1686     int start = textLen - patLen; // Start searching here
1687     if (start < 0) {
1688         args.rval().setInt32(-1);
1689         return true;
1690     }
1691 
1692     if (args.hasDefined(1)) {
1693         if (args[1].isInt32()) {
1694             int i = args[1].toInt32();
1695             if (i <= 0)
1696                 start = 0;
1697             else if (i < start)
1698                 start = i;
1699         } else {
1700             double d;
1701             if (!ToNumber(cx, args[1], &d))
1702                 return false;
1703             if (!IsNaN(d)) {
1704                 d = JS::ToInteger(d);
1705                 if (d <= 0)
1706                     start = 0;
1707                 else if (d < start)
1708                     start = int(d);
1709             }
1710         }
1711     }
1712 
1713     if (patLen == 0) {
1714         args.rval().setInt32(start);
1715         return true;
1716     }
1717 
1718     JSLinearString* text = textstr->ensureLinear(cx);
1719     if (!text)
1720         return false;
1721 
1722     int32_t res;
1723     AutoCheckCannotGC nogc;
1724     if (text->hasLatin1Chars()) {
1725         const Latin1Char* textChars = text->latin1Chars(nogc);
1726         if (pat->hasLatin1Chars())
1727             res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1728         else
1729             res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1730     } else {
1731         const char16_t* textChars = text->twoByteChars(nogc);
1732         if (pat->hasLatin1Chars())
1733             res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1734         else
1735             res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1736     }
1737 
1738     args.rval().setInt32(res);
1739     return true;
1740 }
1741 
1742 bool
HasSubstringAt(JSLinearString * text,JSLinearString * pat,size_t start)1743 js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
1744 {
1745     MOZ_ASSERT(start + pat->length() <= text->length());
1746 
1747     size_t patLen = pat->length();
1748 
1749     AutoCheckCannotGC nogc;
1750     if (text->hasLatin1Chars()) {
1751         const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1752         if (pat->hasLatin1Chars())
1753             return PodEqual(textChars, pat->latin1Chars(nogc), patLen);
1754 
1755         return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1756     }
1757 
1758     const char16_t* textChars = text->twoByteChars(nogc) + start;
1759     if (pat->hasTwoByteChars())
1760         return PodEqual(textChars, pat->twoByteChars(nogc), patLen);
1761 
1762     return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1763 }
1764 
1765 /* ES6 draft rc3 21.1.3.18. */
1766 bool
str_startsWith(JSContext * cx,unsigned argc,Value * vp)1767 js::str_startsWith(JSContext* cx, unsigned argc, Value* vp)
1768 {
1769     CallArgs args = CallArgsFromVp(argc, vp);
1770 
1771     // Steps 1, 2, and 3
1772     RootedString str(cx, ThisToStringForStringProto(cx, args));
1773     if (!str)
1774         return false;
1775 
1776     // Steps 4 and 5
1777     bool isRegExp;
1778     if (!IsRegExp(cx, args.get(0), &isRegExp))
1779         return false;
1780 
1781     // Step 6
1782     if (isRegExp) {
1783         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1784                              "first", "", "Regular Expression");
1785         return false;
1786     }
1787 
1788     // Steps 7 and 8
1789     RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1790     if (!searchStr)
1791         return false;
1792 
1793     // Steps 9 and 10
1794     uint32_t pos = 0;
1795     if (args.hasDefined(1)) {
1796         if (args[1].isInt32()) {
1797             int i = args[1].toInt32();
1798             pos = (i < 0) ? 0U : uint32_t(i);
1799         } else {
1800             double d;
1801             if (!ToInteger(cx, args[1], &d))
1802                 return false;
1803             pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1804         }
1805     }
1806 
1807     // Step 11
1808     uint32_t textLen = str->length();
1809 
1810     // Step 12
1811     uint32_t start = Min(Max(pos, 0U), textLen);
1812 
1813     // Step 13
1814     uint32_t searchLen = searchStr->length();
1815 
1816     // Step 14
1817     if (searchLen + start < searchLen || searchLen + start > textLen) {
1818         args.rval().setBoolean(false);
1819         return true;
1820     }
1821 
1822     // Steps 15 and 16
1823     JSLinearString* text = str->ensureLinear(cx);
1824     if (!text)
1825         return false;
1826 
1827     args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1828     return true;
1829 }
1830 
1831 /* ES6 draft rc3 21.1.3.6. */
1832 static bool
str_endsWith(JSContext * cx,unsigned argc,Value * vp)1833 str_endsWith(JSContext* cx, unsigned argc, Value* vp)
1834 {
1835     CallArgs args = CallArgsFromVp(argc, vp);
1836 
1837     // Steps 1, 2, and 3
1838     RootedString str(cx, ThisToStringForStringProto(cx, args));
1839     if (!str)
1840         return false;
1841 
1842     // Steps 4 and 5
1843     bool isRegExp;
1844     if (!IsRegExp(cx, args.get(0), &isRegExp))
1845         return false;
1846 
1847     // Step 6
1848     if (isRegExp) {
1849         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1850                              "first", "", "Regular Expression");
1851         return false;
1852     }
1853 
1854     // Steps 7 and 8
1855     RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1856     if (!searchStr)
1857         return false;
1858 
1859     // Step 9
1860     uint32_t textLen = str->length();
1861 
1862     // Steps 10 and 11
1863     uint32_t pos = textLen;
1864     if (args.hasDefined(1)) {
1865         if (args[1].isInt32()) {
1866             int i = args[1].toInt32();
1867             pos = (i < 0) ? 0U : uint32_t(i);
1868         } else {
1869             double d;
1870             if (!ToInteger(cx, args[1], &d))
1871                 return false;
1872             pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1873         }
1874     }
1875 
1876     // Step 12
1877     uint32_t end = Min(Max(pos, 0U), textLen);
1878 
1879     // Step 13
1880     uint32_t searchLen = searchStr->length();
1881 
1882     // Step 15 (reordered)
1883     if (searchLen > end) {
1884         args.rval().setBoolean(false);
1885         return true;
1886     }
1887 
1888     // Step 14
1889     uint32_t start = end - searchLen;
1890 
1891     // Steps 16 and 17
1892     JSLinearString* text = str->ensureLinear(cx);
1893     if (!text)
1894         return false;
1895 
1896     args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1897     return true;
1898 }
1899 
1900 template <typename CharT>
1901 static void
TrimString(const CharT * chars,bool trimLeft,bool trimRight,size_t length,size_t * pBegin,size_t * pEnd)1902 TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length,
1903            size_t* pBegin, size_t* pEnd)
1904 {
1905     size_t begin = 0, end = length;
1906 
1907     if (trimLeft) {
1908         while (begin < length && unicode::IsSpace(chars[begin]))
1909             ++begin;
1910     }
1911 
1912     if (trimRight) {
1913         while (end > begin && unicode::IsSpace(chars[end - 1]))
1914             --end;
1915     }
1916 
1917     *pBegin = begin;
1918     *pEnd = end;
1919 }
1920 
1921 static bool
TrimString(JSContext * cx,Value * vp,bool trimLeft,bool trimRight)1922 TrimString(JSContext* cx, Value* vp, bool trimLeft, bool trimRight)
1923 {
1924     CallReceiver call = CallReceiverFromVp(vp);
1925     RootedString str(cx, ThisToStringForStringProto(cx, call));
1926     if (!str)
1927         return false;
1928 
1929     JSLinearString* linear = str->ensureLinear(cx);
1930     if (!linear)
1931         return false;
1932 
1933     size_t length = linear->length();
1934     size_t begin, end;
1935     if (linear->hasLatin1Chars()) {
1936         AutoCheckCannotGC nogc;
1937         TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end);
1938     } else {
1939         AutoCheckCannotGC nogc;
1940         TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end);
1941     }
1942 
1943     str = NewDependentString(cx, str, begin, end - begin);
1944     if (!str)
1945         return false;
1946 
1947     call.rval().setString(str);
1948     return true;
1949 }
1950 
1951 static bool
str_trim(JSContext * cx,unsigned argc,Value * vp)1952 str_trim(JSContext* cx, unsigned argc, Value* vp)
1953 {
1954     return TrimString(cx, vp, true, true);
1955 }
1956 
1957 static bool
str_trimLeft(JSContext * cx,unsigned argc,Value * vp)1958 str_trimLeft(JSContext* cx, unsigned argc, Value* vp)
1959 {
1960     return TrimString(cx, vp, true, false);
1961 }
1962 
1963 static bool
str_trimRight(JSContext * cx,unsigned argc,Value * vp)1964 str_trimRight(JSContext* cx, unsigned argc, Value* vp)
1965 {
1966     return TrimString(cx, vp, false, true);
1967 }
1968 
1969 /*
1970  * Perl-inspired string functions.
1971  */
1972 
1973 namespace {
1974 
1975 /* Result of a successfully performed flat match. */
1976 class FlatMatch
1977 {
1978     RootedAtom pat_;
1979     int32_t match_;
1980 
1981     friend class StringRegExpGuard;
1982 
1983   public:
FlatMatch(JSContext * cx)1984     explicit FlatMatch(JSContext* cx) : pat_(cx) {}
pattern() const1985     JSLinearString* pattern() const { return pat_; }
patternLength() const1986     size_t patternLength() const { return pat_->length(); }
1987 
1988     /*
1989      * Note: The match is -1 when the match is performed successfully,
1990      * but no match is found.
1991      */
match() const1992     int32_t match() const { return match_; }
1993 };
1994 
1995 } /* anonymous namespace */
1996 
1997 static inline bool
IsRegExpMetaChar(char16_t c)1998 IsRegExpMetaChar(char16_t c)
1999 {
2000     switch (c) {
2001       /* Taken from the PatternCharacter production in 15.10.1. */
2002       case '^': case '$': case '\\': case '.': case '*': case '+':
2003       case '?': case '(': case ')': case '[': case ']': case '{':
2004       case '}': case '|':
2005         return true;
2006       default:
2007         return false;
2008     }
2009 }
2010 
2011 template <typename CharT>
2012 bool
HasRegExpMetaChars(const CharT * chars,size_t length)2013 js::HasRegExpMetaChars(const CharT* chars, size_t length)
2014 {
2015     for (size_t i = 0; i < length; ++i) {
2016         if (IsRegExpMetaChar(chars[i]))
2017             return true;
2018     }
2019     return false;
2020 }
2021 
2022 template bool
2023 js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars, size_t length);
2024 
2025 template bool
2026 js::HasRegExpMetaChars<char16_t>(const char16_t* chars, size_t length);
2027 
2028 bool
StringHasRegExpMetaChars(JSLinearString * str)2029 js::StringHasRegExpMetaChars(JSLinearString* str)
2030 {
2031     AutoCheckCannotGC nogc;
2032     if (str->hasLatin1Chars())
2033         return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
2034 
2035     return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
2036 }
2037 
2038 namespace {
2039 
2040 /*
2041  * StringRegExpGuard factors logic out of String regexp operations.
2042  *
2043  * |optarg| indicates in which argument position RegExp flags will be found, if
2044  * present. This is a Mozilla extension and not part of any ECMA spec.
2045  */
2046 class MOZ_STACK_CLASS StringRegExpGuard
2047 {
2048     RegExpGuard re_;
2049     FlatMatch   fm;
2050     RootedObject obj_;
2051 
2052     /*
2053      * Upper bound on the number of characters we are willing to potentially
2054      * waste on searching for RegExp meta-characters.
2055      */
2056     static const size_t MAX_FLAT_PAT_LEN = 256;
2057 
2058     template <typename CharT>
2059     static bool
flattenPattern(StringBuffer & sb,const CharT * chars,size_t len)2060     flattenPattern(StringBuffer& sb, const CharT* chars, size_t len)
2061     {
2062         static const char ESCAPE_CHAR = '\\';
2063         for (const CharT* it = chars; it < chars + len; ++it) {
2064             if (IsRegExpMetaChar(*it)) {
2065                 if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
2066                     return false;
2067             } else {
2068                 if (!sb.append(*it))
2069                     return false;
2070             }
2071         }
2072         return true;
2073     }
2074 
2075     static JSAtom*
flattenPattern(JSContext * cx,JSAtom * pat)2076     flattenPattern(JSContext* cx, JSAtom* pat)
2077     {
2078         StringBuffer sb(cx);
2079         if (!sb.reserve(pat->length()))
2080             return nullptr;
2081 
2082         if (pat->hasLatin1Chars()) {
2083             AutoCheckCannotGC nogc;
2084             if (!flattenPattern(sb, pat->latin1Chars(nogc), pat->length()))
2085                 return nullptr;
2086         } else {
2087             AutoCheckCannotGC nogc;
2088             if (!flattenPattern(sb, pat->twoByteChars(nogc), pat->length()))
2089                 return nullptr;
2090         }
2091 
2092         return sb.finishAtom();
2093     }
2094 
2095   public:
StringRegExpGuard(JSContext * cx)2096     explicit StringRegExpGuard(JSContext* cx)
2097       : re_(cx), fm(cx), obj_(cx)
2098     { }
2099 
2100     /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
init(JSContext * cx,const CallArgs & args,bool convertVoid=false)2101     bool init(JSContext* cx, const CallArgs& args, bool convertVoid = false)
2102     {
2103         if (args.length() != 0) {
2104             ESClassValue cls;
2105             if (!GetClassOfValue(cx, args[0], &cls))
2106                 return false;
2107 
2108             if (cls == ESClass_RegExp)
2109                 return initRegExp(cx, &args[0].toObject());
2110         }
2111 
2112         if (convertVoid && !args.hasDefined(0)) {
2113             fm.pat_ = cx->runtime()->emptyString;
2114             return true;
2115         }
2116 
2117         JSString* arg = ArgToRootedString(cx, args, 0);
2118         if (!arg)
2119             return false;
2120 
2121         fm.pat_ = AtomizeString(cx, arg);
2122         if (!fm.pat_)
2123             return false;
2124 
2125         return true;
2126     }
2127 
initRegExp(JSContext * cx,JSObject * regexp)2128     bool initRegExp(JSContext* cx, JSObject* regexp) {
2129         obj_ = regexp;
2130         return RegExpToShared(cx, obj_, &re_);
2131     }
2132 
init(JSContext * cx,HandleString pattern)2133     bool init(JSContext* cx, HandleString pattern) {
2134         fm.pat_ = AtomizeString(cx, pattern);
2135         if (!fm.pat_)
2136             return false;
2137         return true;
2138     }
2139 
2140     /*
2141      * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2142      * the pattern string, or a lengthy pattern string can thwart this process.
2143      *
2144      * |checkMetaChars| looks for regexp metachars in the pattern string.
2145      *
2146      * Return whether flat matching could be used.
2147      *
2148      * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2149      * cx->isExceptionPending().
2150      */
2151     const FlatMatch*
tryFlatMatch(JSContext * cx,JSString * text,unsigned optarg,unsigned argc,bool checkMetaChars=true)2152     tryFlatMatch(JSContext* cx, JSString* text, unsigned optarg, unsigned argc,
2153                  bool checkMetaChars = true)
2154     {
2155         if (re_.initialized())
2156             return nullptr;
2157 
2158         if (optarg < argc)
2159             return nullptr;
2160 
2161         size_t patLen = fm.pat_->length();
2162         if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
2163             return nullptr;
2164 
2165         /*
2166          * |text| could be a rope, so we want to avoid flattening it for as
2167          * long as possible.
2168          */
2169         if (text->isRope()) {
2170             if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
2171                 return nullptr;
2172         } else {
2173             fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
2174         }
2175 
2176         return &fm;
2177     }
2178 
2179     /* If the pattern is not already a regular expression, make it so. */
normalizeRegExp(JSContext * cx,bool flat,unsigned optarg,const CallArgs & args)2180     bool normalizeRegExp(JSContext* cx, bool flat, unsigned optarg, const CallArgs& args)
2181     {
2182         if (re_.initialized())
2183             return true;
2184 
2185         /* Build RegExp from pattern string. */
2186         RootedString opt(cx);
2187         if (optarg < args.length()) {
2188             if (JSScript* script = cx->currentScript()) {
2189                 const char* filename = script->filename();
2190                 cx->compartment()->addTelemetry(filename, JSCompartment::DeprecatedFlagsArgument);
2191             }
2192 
2193             if (!cx->compartment()->warnedAboutFlagsArgument) {
2194                 if (!JS_ReportErrorFlagsAndNumber(cx, JSREPORT_WARNING, GetErrorMessage, nullptr,
2195                                                   JSMSG_DEPRECATED_FLAGS_ARG))
2196                     return false;
2197                 cx->compartment()->warnedAboutFlagsArgument = true;
2198             }
2199 
2200             opt = ToString<CanGC>(cx, args[optarg]);
2201             if (!opt)
2202                 return false;
2203         } else {
2204             opt = nullptr;
2205         }
2206 
2207         Rooted<JSAtom*> pat(cx);
2208         if (flat) {
2209             pat = flattenPattern(cx, fm.pat_);
2210             if (!pat)
2211                 return false;
2212         } else {
2213             pat = fm.pat_;
2214         }
2215         MOZ_ASSERT(pat);
2216 
2217         return cx->compartment()->regExps.get(cx, pat, opt, &re_);
2218     }
2219 
zeroLastIndex(JSContext * cx)2220     bool zeroLastIndex(JSContext* cx) {
2221         if (!regExpIsObject())
2222             return true;
2223 
2224         // Use a fast path for same-global RegExp objects with writable
2225         // lastIndex.
2226         if (obj_->is<RegExpObject>()) {
2227             RegExpObject* nobj = &obj_->as<RegExpObject>();
2228             if (nobj->lookup(cx, cx->names().lastIndex)->writable()) {
2229                 nobj->zeroLastIndex(cx);
2230                 return true;
2231             }
2232         }
2233 
2234         // Handle everything else generically (including throwing if .lastIndex is non-writable).
2235         RootedValue zero(cx, Int32Value(0));
2236         return SetProperty(cx, obj_, cx->names().lastIndex, zero);
2237     }
2238 
regExp()2239     RegExpShared& regExp() { return *re_; }
2240 
regExpIsObject()2241     bool regExpIsObject() { return obj_ != nullptr; }
regExpObject()2242     HandleObject regExpObject() {
2243         MOZ_ASSERT(regExpIsObject());
2244         return obj_;
2245     }
2246 
2247   private:
2248     StringRegExpGuard(const StringRegExpGuard&) = delete;
2249     void operator=(const StringRegExpGuard&) = delete;
2250 };
2251 
2252 } /* anonymous namespace */
2253 
2254 static bool
DoMatchLocal(JSContext * cx,const CallArgs & args,RegExpStatics * res,HandleLinearString input,RegExpShared & re)2255 DoMatchLocal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
2256              RegExpShared& re)
2257 {
2258     ScopedMatchPairs matches(&cx->tempLifoAlloc());
2259     RegExpRunStatus status = re.execute(cx, input, 0, &matches);
2260     if (status == RegExpRunStatus_Error)
2261         return false;
2262 
2263     if (status == RegExpRunStatus_Success_NotFound) {
2264         args.rval().setNull();
2265         return true;
2266     }
2267 
2268     if (!res->updateFromMatchPairs(cx, input, matches))
2269         return false;
2270 
2271     RootedValue rval(cx);
2272     if (!CreateRegExpMatchResult(cx, input, matches, &rval))
2273         return false;
2274 
2275     args.rval().set(rval);
2276     return true;
2277 }
2278 
2279 /* ES5 15.5.4.10 step 8. */
2280 static bool
DoMatchGlobal(JSContext * cx,const CallArgs & args,RegExpStatics * res,HandleLinearString input,StringRegExpGuard & g)2281 DoMatchGlobal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
2282               StringRegExpGuard& g)
2283 {
2284     // Step 8a.
2285     //
2286     // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2287     // rest of String.prototype.match, particularly in steps 8f(i) and
2288     // 8f(iii)(2)(a).  Here's why.
2289     //
2290     // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2291     // whose .global is true and a string.  The only side effect of a call in
2292     // these circumstances is that the RegExp's .lastIndex will be modified to
2293     // the next starting index after the discovered match (or to 0 if there's
2294     // no remaining match).  Because .lastIndex is a non-configurable data
2295     // property and no script-controllable code executes after step 8a, passing
2296     // step 8a implies *every* .lastIndex set succeeds.  String.prototype.match
2297     // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2298     // so the final value of .lastIndex is 0: exactly the state after step 8a
2299     // succeeds.  No spec step lets script observe intermediate .lastIndex
2300     // values.
2301     //
2302     // The arrays returned by RegExp.prototype.exec always have a string at
2303     // index 0, for which [[Get]]s have no side effects.
2304     //
2305     // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2306     //
2307     // This is a tricky point, because after this set, our implementation *can*
2308     // fail.  The key is that script can't distinguish these failure modes from
2309     // one where, in spec terms, we fail immediately after step 8a.  That *in
2310     // reality* we might have done extra matching work, or created a partial
2311     // results array to return, or hit an interrupt, is irrelevant.  The
2312     // script can't tell we did any of those things but didn't update
2313     // .lastIndex.  Thus we can optimize steps 8b onward however we want,
2314     // including eliminating intermediate .lastIndex sets, as long as we don't
2315     // add ways for script to observe the intermediate states.
2316     //
2317     // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2318     // we can't get caught.
2319     if (!g.zeroLastIndex(cx))
2320         return false;
2321 
2322     // Step 8b.
2323     AutoValueVector elements(cx);
2324 
2325     size_t lastSuccessfulStart = 0;
2326 
2327     // The loop variables from steps 8c-e aren't needed, as we use different
2328     // techniques from the spec to implement step 8f's loop.
2329 
2330     // Step 8f.
2331     ScopedMatchPairs matches(&cx->tempLifoAlloc());
2332     size_t charsLen = input->length();
2333     RegExpShared& re = g.regExp();
2334     for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
2335         if (!CheckForInterrupt(cx))
2336             return false;
2337 
2338         // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2339         RegExpRunStatus status = re.execute(cx, input, searchIndex, &matches);
2340         if (status == RegExpRunStatus_Error)
2341             return false;
2342 
2343         // Step 8f(ii).
2344         if (status == RegExpRunStatus_Success_NotFound)
2345             break;
2346 
2347         lastSuccessfulStart = searchIndex;
2348         MatchPair& match = matches[0];
2349 
2350         // Steps 8f(iii)(1-3).
2351         searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2352 
2353         // Step 8f(iii)(4-5).
2354         JSLinearString* str = NewDependentString(cx, input, match.start, match.length());
2355         if (!str)
2356             return false;
2357         if (!elements.append(StringValue(str)))
2358             return false;
2359     }
2360 
2361     // Step 8g.
2362     if (elements.empty()) {
2363         args.rval().setNull();
2364         return true;
2365     }
2366 
2367     // The last *successful* match updates the RegExpStatics. (Interestingly,
2368     // this implies that String.prototype.match's semantics aren't those
2369     // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2370     res->updateLazily(cx, input, &re, lastSuccessfulStart);
2371 
2372     // Steps 8b, 8f(iii)(5-6), 8h.
2373     JSObject* array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
2374     if (!array)
2375         return false;
2376 
2377     args.rval().setObject(*array);
2378     return true;
2379 }
2380 
2381 static bool
BuildFlatMatchArray(JSContext * cx,HandleString textstr,const FlatMatch & fm,CallArgs * args)2382 BuildFlatMatchArray(JSContext* cx, HandleString textstr, const FlatMatch& fm, CallArgs* args)
2383 {
2384     if (fm.match() < 0) {
2385         args->rval().setNull();
2386         return true;
2387     }
2388 
2389     /* Get the templateObject that defines the shape and type of the output object */
2390     JSObject* templateObject = cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
2391     if (!templateObject)
2392         return false;
2393 
2394     RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
2395     if (!arr)
2396         return false;
2397 
2398     /* Store a Value for each pair. */
2399     arr->setDenseInitializedLength(1);
2400     arr->initDenseElement(0, StringValue(fm.pattern()));
2401 
2402     /* Set the |index| property. (TemplateObject positions it in slot 0) */
2403     arr->setSlot(0, Int32Value(fm.match()));
2404 
2405     /* Set the |input| property. (TemplateObject positions it in slot 1) */
2406     arr->setSlot(1, StringValue(textstr));
2407 
2408 #ifdef DEBUG
2409     RootedValue test(cx);
2410     RootedId id(cx, NameToId(cx->names().index));
2411     if (!NativeGetProperty(cx, arr, id, &test))
2412         return false;
2413     MOZ_ASSERT(test == arr->getSlot(0));
2414     id = NameToId(cx->names().input);
2415     if (!NativeGetProperty(cx, arr, id, &test))
2416         return false;
2417     MOZ_ASSERT(test == arr->getSlot(1));
2418 #endif
2419 
2420     args->rval().setObject(*arr);
2421     return true;
2422 }
2423 
2424 /* ES5 15.5.4.10. */
2425 bool
str_match(JSContext * cx,unsigned argc,Value * vp)2426 js::str_match(JSContext* cx, unsigned argc, Value* vp)
2427 {
2428     CallArgs args = CallArgsFromVp(argc, vp);
2429 
2430     /* Steps 1-2. */
2431     RootedString str(cx, ThisToStringForStringProto(cx, args));
2432     if (!str)
2433         return false;
2434 
2435     /* Steps 3-4, plus the trailing-argument "flags" extension. */
2436     StringRegExpGuard g(cx);
2437     if (!g.init(cx, args, true))
2438         return false;
2439 
2440     /* Fast path when the search pattern can be searched for as a string. */
2441     if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length()))
2442         return BuildFlatMatchArray(cx, str, *fm, &args);
2443 
2444     /* Return if there was an error in tryFlatMatch. */
2445     if (cx->isExceptionPending())
2446         return false;
2447 
2448     /* Create regular-expression internals as needed to perform the match. */
2449     if (!g.normalizeRegExp(cx, false, 1, args))
2450         return false;
2451 
2452     RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2453     if (!res)
2454         return false;
2455 
2456     RootedLinearString linearStr(cx, str->ensureLinear(cx));
2457     if (!linearStr)
2458         return false;
2459 
2460     /* Steps 5-6, 7. */
2461     if (!g.regExp().global())
2462         return DoMatchLocal(cx, args, res, linearStr, g.regExp());
2463 
2464     /* Steps 6, 8. */
2465     return DoMatchGlobal(cx, args, res, linearStr, g);
2466 }
2467 
2468 bool
str_search(JSContext * cx,unsigned argc,Value * vp)2469 js::str_search(JSContext* cx, unsigned argc, Value* vp)
2470 {
2471     CallArgs args = CallArgsFromVp(argc, vp);
2472     RootedString str(cx, ThisToStringForStringProto(cx, args));
2473     if (!str)
2474         return false;
2475 
2476     StringRegExpGuard g(cx);
2477     if (!g.init(cx, args, true))
2478         return false;
2479     if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length())) {
2480         args.rval().setInt32(fm->match());
2481         return true;
2482     }
2483 
2484     if (cx->isExceptionPending())  /* from tryFlatMatch */
2485         return false;
2486 
2487     if (!g.normalizeRegExp(cx, false, 1, args))
2488         return false;
2489 
2490     RootedLinearString linearStr(cx, str->ensureLinear(cx));
2491     if (!linearStr)
2492         return false;
2493 
2494     RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2495     if (!res)
2496         return false;
2497 
2498     /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2499     ScopedMatchPairs matches(&cx->tempLifoAlloc());
2500     RegExpRunStatus status = g.regExp().execute(cx, linearStr, 0, &matches);
2501     if (status == RegExpRunStatus_Error)
2502         return false;
2503 
2504     if (status == RegExpRunStatus_Success)
2505         res->updateLazily(cx, linearStr, &g.regExp(), 0);
2506 
2507     args.rval().setInt32(status == RegExpRunStatus_Success_NotFound ? -1 : matches[0].start);
2508     return true;
2509 }
2510 
2511 // Utility for building a rope (lazy concatenation) of strings.
2512 class RopeBuilder {
2513     JSContext* cx;
2514     RootedString res;
2515 
2516     RopeBuilder(const RopeBuilder& other) = delete;
2517     void operator=(const RopeBuilder& other) = delete;
2518 
2519   public:
RopeBuilder(JSContext * cx)2520     explicit RopeBuilder(JSContext* cx)
2521       : cx(cx), res(cx, cx->runtime()->emptyString)
2522     {}
2523 
append(HandleString str)2524     inline bool append(HandleString str) {
2525         res = ConcatStrings<CanGC>(cx, res, str);
2526         return !!res;
2527     }
2528 
result()2529     inline JSString* result() {
2530         return res;
2531     }
2532 };
2533 
2534 namespace {
2535 
2536 template <typename CharT>
2537 static uint32_t
FindDollarIndex(const CharT * chars,size_t length)2538 FindDollarIndex(const CharT* chars, size_t length)
2539 {
2540     if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2541         uint32_t dollarIndex = p - chars;
2542         MOZ_ASSERT(dollarIndex < length);
2543         return dollarIndex;
2544     }
2545     return UINT32_MAX;
2546 }
2547 
2548 struct ReplaceData
2549 {
ReplaceData__anon6cc13cee0311::ReplaceData2550     explicit ReplaceData(JSContext* cx)
2551       : str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
2552         fig(cx, NullValue()), sb(cx)
2553     {}
2554 
setReplacementString__anon6cc13cee0311::ReplaceData2555     inline void setReplacementString(JSLinearString* string) {
2556         MOZ_ASSERT(string);
2557         lambda = nullptr;
2558         elembase = nullptr;
2559         repstr = string;
2560 
2561         AutoCheckCannotGC nogc;
2562         dollarIndex = string->hasLatin1Chars()
2563                       ? FindDollarIndex(string->latin1Chars(nogc), string->length())
2564                       : FindDollarIndex(string->twoByteChars(nogc), string->length());
2565     }
2566 
setReplacementFunction__anon6cc13cee0311::ReplaceData2567     inline void setReplacementFunction(JSObject* func) {
2568         MOZ_ASSERT(func);
2569         lambda = func;
2570         elembase = nullptr;
2571         repstr = nullptr;
2572         dollarIndex = UINT32_MAX;
2573     }
2574 
2575     RootedString       str;            /* 'this' parameter object as a string */
2576     StringRegExpGuard  g;              /* regexp parameter object and private data */
2577     RootedObject       lambda;         /* replacement function object or null */
2578     RootedNativeObject elembase;       /* object for function(a){return b[a]} replace */
2579     RootedLinearString repstr;         /* replacement string */
2580     uint32_t           dollarIndex;    /* index of first $ in repstr, or UINT32_MAX */
2581     int                leftIndex;      /* left context index in str->chars */
2582     bool               calledBack;     /* record whether callback has been called */
2583     FastInvokeGuard    fig;            /* used for lambda calls, also holds arguments */
2584     StringBuffer       sb;             /* buffer built during DoMatch */
2585 };
2586 
2587 } /* anonymous namespace */
2588 
2589 static bool
2590 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata);
2591 
2592 static bool
DoMatchForReplaceLocal(JSContext * cx,RegExpStatics * res,HandleLinearString linearStr,RegExpShared & re,ReplaceData & rdata,size_t * rightContextOffset)2593 DoMatchForReplaceLocal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2594                        RegExpShared& re, ReplaceData& rdata, size_t* rightContextOffset)
2595 {
2596     ScopedMatchPairs matches(&cx->tempLifoAlloc());
2597     RegExpRunStatus status = re.execute(cx, linearStr, 0, &matches);
2598     if (status == RegExpRunStatus_Error)
2599         return false;
2600 
2601     if (status == RegExpRunStatus_Success_NotFound)
2602         return true;
2603 
2604     MatchPair& match = matches[0];
2605     *rightContextOffset = match.limit;
2606 
2607     if (!res->updateFromMatchPairs(cx, linearStr, matches))
2608         return false;
2609 
2610     return ReplaceRegExp(cx, res, rdata);
2611 }
2612 
2613 static bool
DoMatchForReplaceGlobal(JSContext * cx,RegExpStatics * res,HandleLinearString linearStr,RegExpShared & re,ReplaceData & rdata,size_t * rightContextOffset)2614 DoMatchForReplaceGlobal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2615                         RegExpShared& re, ReplaceData& rdata, size_t* rightContextOffset)
2616 {
2617     size_t charsLen = linearStr->length();
2618     ScopedMatchPairs matches(&cx->tempLifoAlloc());
2619     for (size_t count = 0, searchIndex = 0; searchIndex <= charsLen; ++count) {
2620         if (!CheckForInterrupt(cx))
2621             return false;
2622 
2623         RegExpRunStatus status = re.execute(cx, linearStr, searchIndex, &matches);
2624         if (status == RegExpRunStatus_Error)
2625             return false;
2626 
2627         if (status == RegExpRunStatus_Success_NotFound)
2628             break;
2629 
2630         MatchPair& match = matches[0];
2631         searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2632         *rightContextOffset = match.limit;
2633 
2634         if (!res->updateFromMatchPairs(cx, linearStr, matches))
2635             return false;
2636 
2637         if (!ReplaceRegExp(cx, res, rdata))
2638             return false;
2639     }
2640 
2641     return true;
2642 }
2643 
2644 template <typename CharT>
2645 static bool
InterpretDollar(RegExpStatics * res,const CharT * bp,const CharT * dp,const CharT * ep,ReplaceData & rdata,JSSubString * out,size_t * skip)2646 InterpretDollar(RegExpStatics* res, const CharT* bp, const CharT* dp, const CharT* ep,
2647                 ReplaceData& rdata, JSSubString* out, size_t* skip)
2648 {
2649     MOZ_ASSERT(*dp == '$');
2650 
2651     /* If there is only a dollar, bail now */
2652     if (dp + 1 >= ep)
2653         return false;
2654 
2655     /* Interpret all Perl match-induced dollar variables. */
2656     char16_t dc = dp[1];
2657     if (JS7_ISDEC(dc)) {
2658         /* ECMA-262 Edition 3: 1-9 or 01-99 */
2659         unsigned num = JS7_UNDEC(dc);
2660         if (num > res->getMatches().parenCount())
2661             return false;
2662 
2663         const CharT* cp = dp + 2;
2664         if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
2665             unsigned tmp = 10 * num + JS7_UNDEC(dc);
2666             if (tmp <= res->getMatches().parenCount()) {
2667                 cp++;
2668                 num = tmp;
2669             }
2670         }
2671         if (num == 0)
2672             return false;
2673 
2674         *skip = cp - dp;
2675 
2676         MOZ_ASSERT(num <= res->getMatches().parenCount());
2677 
2678         /*
2679          * Note: we index to get the paren with the (1-indexed) pair
2680          * number, as opposed to a (0-indexed) paren number.
2681          */
2682         res->getParen(num, out);
2683         return true;
2684     }
2685 
2686     *skip = 2;
2687     switch (dc) {
2688       case '$':
2689         out->init(rdata.repstr, dp - bp, 1);
2690         return true;
2691       case '&':
2692         res->getLastMatch(out);
2693         return true;
2694       case '+':
2695         res->getLastParen(out);
2696         return true;
2697       case '`':
2698         res->getLeftContext(out);
2699         return true;
2700       case '\'':
2701         res->getRightContext(out);
2702         return true;
2703     }
2704     return false;
2705 }
2706 
2707 template <typename CharT>
2708 static bool
FindReplaceLengthString(JSContext * cx,RegExpStatics * res,ReplaceData & rdata,size_t * sizep)2709 FindReplaceLengthString(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2710 {
2711     JSLinearString* repstr = rdata.repstr;
2712     CheckedInt<uint32_t> replen = repstr->length();
2713 
2714     if (rdata.dollarIndex != UINT32_MAX) {
2715         AutoCheckCannotGC nogc;
2716         MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2717         const CharT* bp = repstr->chars<CharT>(nogc);
2718         const CharT* dp = bp + rdata.dollarIndex;
2719         const CharT* ep = bp + repstr->length();
2720         do {
2721             JSSubString sub;
2722             size_t skip;
2723             if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2724                 if (sub.length > skip)
2725                     replen += sub.length - skip;
2726                 else
2727                     replen -= skip - sub.length;
2728                 dp += skip;
2729             } else {
2730                 dp++;
2731             }
2732 
2733             dp = js_strchr_limit(dp, '$', ep);
2734         } while (dp);
2735     }
2736 
2737     if (!replen.isValid()) {
2738         ReportAllocationOverflow(cx);
2739         return false;
2740     }
2741 
2742     *sizep = replen.value();
2743     return true;
2744 }
2745 
2746 static bool
FindReplaceLength(JSContext * cx,RegExpStatics * res,ReplaceData & rdata,size_t * sizep)2747 FindReplaceLength(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2748 {
2749     if (rdata.elembase) {
2750         /*
2751          * The base object is used when replace was passed a lambda which looks like
2752          * 'function(a) { return b[a]; }' for the base object b.  b will not change
2753          * in the course of the replace unless we end up making a scripted call due
2754          * to accessing a scripted getter or a value with a scripted toString.
2755          */
2756         MOZ_ASSERT(rdata.lambda);
2757         MOZ_ASSERT(!rdata.elembase->getOps()->lookupProperty);
2758         MOZ_ASSERT(!rdata.elembase->getOps()->getProperty);
2759 
2760         RootedValue match(cx);
2761         if (!res->createLastMatch(cx, &match))
2762             return false;
2763         JSAtom* atom = ToAtom<CanGC>(cx, match);
2764         if (!atom)
2765             return false;
2766 
2767         RootedValue v(cx);
2768         if (HasDataProperty(cx, rdata.elembase, AtomToId(atom), v.address()) && v.isString()) {
2769             rdata.repstr = v.toString()->ensureLinear(cx);
2770             if (!rdata.repstr)
2771                 return false;
2772             *sizep = rdata.repstr->length();
2773             return true;
2774         }
2775 
2776         /*
2777          * Couldn't handle this property, fall through and despecialize to the
2778          * general lambda case.
2779          */
2780         rdata.elembase = nullptr;
2781     }
2782 
2783     if (rdata.lambda) {
2784         RootedObject lambda(cx, rdata.lambda);
2785 
2786         /*
2787          * In the lambda case, not only do we find the replacement string's
2788          * length, we compute repstr and return it via rdata for use within
2789          * DoReplace.  The lambda is called with arguments ($&, $1, $2, ...,
2790          * index, input), i.e., all the properties of a regexp match array.
2791          * For $&, etc., we must create string jsvals from cx->regExpStatics.
2792          * We grab up stack space to keep the newborn strings GC-rooted.
2793          */
2794         unsigned p = res->getMatches().parenCount();
2795         unsigned argc = 1 + p + 2;
2796 
2797         InvokeArgs& args = rdata.fig.args();
2798         if (!args.init(cx, argc))
2799             return false;
2800 
2801         args.setCallee(ObjectValue(*lambda));
2802         args.setThis(UndefinedValue());
2803 
2804         /* Push $&, $1, $2, ... */
2805         unsigned argi = 0;
2806         if (!res->createLastMatch(cx, args[argi++]))
2807             return false;
2808 
2809         for (size_t i = 0; i < res->getMatches().parenCount(); ++i) {
2810             if (!res->createParen(cx, i + 1, args[argi++]))
2811                 return false;
2812         }
2813 
2814         /* Push match index and input string. */
2815         args[argi++].setInt32(res->getMatches()[0].start);
2816         args[argi].setString(rdata.str);
2817 
2818         if (!rdata.fig.invoke(cx))
2819             return false;
2820 
2821         /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2822         JSString* repstr = ToString<CanGC>(cx, args.rval());
2823         if (!repstr)
2824             return false;
2825         rdata.repstr = repstr->ensureLinear(cx);
2826         if (!rdata.repstr)
2827             return false;
2828         *sizep = rdata.repstr->length();
2829         return true;
2830     }
2831 
2832     return rdata.repstr->hasLatin1Chars()
2833            ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
2834            : FindReplaceLengthString<char16_t>(cx, res, rdata, sizep);
2835 }
2836 
2837 /*
2838  * Precondition: |rdata.sb| already has necessary growth space reserved (as
2839  * derived from FindReplaceLength), and has been inflated to TwoByte if
2840  * necessary.
2841  */
2842 template <typename CharT>
2843 static void
DoReplace(RegExpStatics * res,ReplaceData & rdata)2844 DoReplace(RegExpStatics* res, ReplaceData& rdata)
2845 {
2846     AutoCheckCannotGC nogc;
2847     JSLinearString* repstr = rdata.repstr;
2848     const CharT* bp = repstr->chars<CharT>(nogc);
2849     const CharT* cp = bp;
2850 
2851     if (rdata.dollarIndex != UINT32_MAX) {
2852         MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2853         const CharT* dp = bp + rdata.dollarIndex;
2854         const CharT* ep = bp + repstr->length();
2855         do {
2856             /* Move one of the constant portions of the replacement value. */
2857             size_t len = dp - cp;
2858             rdata.sb.infallibleAppend(cp, len);
2859             cp = dp;
2860 
2861             JSSubString sub;
2862             size_t skip;
2863             if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2864                 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
2865                 cp += skip;
2866                 dp += skip;
2867             } else {
2868                 dp++;
2869             }
2870 
2871             dp = js_strchr_limit(dp, '$', ep);
2872         } while (dp);
2873     }
2874     rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
2875 }
2876 
2877 static bool
ReplaceRegExp(JSContext * cx,RegExpStatics * res,ReplaceData & rdata)2878 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata)
2879 {
2880 
2881     const MatchPair& match = res->getMatches()[0];
2882     MOZ_ASSERT(!match.isUndefined());
2883     MOZ_ASSERT(match.limit >= match.start && match.limit >= 0);
2884 
2885     rdata.calledBack = true;
2886     size_t leftoff = rdata.leftIndex;
2887     size_t leftlen = match.start - leftoff;
2888     rdata.leftIndex = match.limit;
2889 
2890     size_t replen = 0;  /* silence 'unused' warning */
2891     if (!FindReplaceLength(cx, res, rdata, &replen))
2892         return false;
2893 
2894     CheckedInt<uint32_t> newlen(rdata.sb.length());
2895     newlen += leftlen;
2896     newlen += replen;
2897     if (!newlen.isValid()) {
2898         ReportAllocationOverflow(cx);
2899         return false;
2900     }
2901 
2902     /*
2903      * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2904      * inflation later on.
2905      */
2906     JSLinearString& str = rdata.str->asLinear();  /* flattened for regexp */
2907     if (str.hasTwoByteChars() || rdata.repstr->hasTwoByteChars()) {
2908         if (!rdata.sb.ensureTwoByteChars())
2909             return false;
2910     }
2911 
2912     if (!rdata.sb.reserve(newlen.value()))
2913         return false;
2914 
2915     /* Append skipped-over portion of the search value. */
2916     rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
2917 
2918     if (rdata.repstr->hasLatin1Chars())
2919         DoReplace<Latin1Char>(res, rdata);
2920     else
2921         DoReplace<char16_t>(res, rdata);
2922     return true;
2923 }
2924 
2925 static JSString*
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleString repstr,const FlatMatch & fm)2926 BuildFlatReplacement(JSContext* cx, HandleString textstr, HandleString repstr,
2927                      const FlatMatch& fm)
2928 {
2929     RopeBuilder builder(cx);
2930     size_t match = fm.match();
2931     size_t matchEnd = match + fm.patternLength();
2932 
2933     if (textstr->isRope()) {
2934         /*
2935          * If we are replacing over a rope, avoid flattening it by iterating
2936          * through it, building a new rope.
2937          */
2938         StringSegmentRange r(cx);
2939         if (!r.init(textstr))
2940             return nullptr;
2941 
2942         size_t pos = 0;
2943         while (!r.empty()) {
2944             RootedString str(cx, r.front());
2945             size_t len = str->length();
2946             size_t strEnd = pos + len;
2947             if (pos < matchEnd && strEnd > match) {
2948                 /*
2949                  * We need to special-case any part of the rope that overlaps
2950                  * with the replacement string.
2951                  */
2952                 if (match >= pos) {
2953                     /*
2954                      * If this part of the rope overlaps with the left side of
2955                      * the pattern, then it must be the only one to overlap with
2956                      * the first character in the pattern, so we include the
2957                      * replacement string here.
2958                      */
2959                     RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2960                     if (!leftSide ||
2961                         !builder.append(leftSide) ||
2962                         !builder.append(repstr))
2963                     {
2964                         return nullptr;
2965                     }
2966                 }
2967 
2968                 /*
2969                  * If str runs off the end of the matched string, append the
2970                  * last part of str.
2971                  */
2972                 if (strEnd > matchEnd) {
2973                     RootedString rightSide(cx, NewDependentString(cx, str, matchEnd - pos,
2974                                                                   strEnd - matchEnd));
2975                     if (!rightSide || !builder.append(rightSide))
2976                         return nullptr;
2977                 }
2978             } else {
2979                 if (!builder.append(str))
2980                     return nullptr;
2981             }
2982             pos += str->length();
2983             if (!r.popFront())
2984                 return nullptr;
2985         }
2986     } else {
2987         RootedString leftSide(cx, NewDependentString(cx, textstr, 0, match));
2988         if (!leftSide)
2989             return nullptr;
2990         RootedString rightSide(cx);
2991         rightSide = NewDependentString(cx, textstr, match + fm.patternLength(),
2992                                        textstr->length() - match - fm.patternLength());
2993         if (!rightSide ||
2994             !builder.append(leftSide) ||
2995             !builder.append(repstr) ||
2996             !builder.append(rightSide))
2997         {
2998             return nullptr;
2999         }
3000     }
3001 
3002     return builder.result();
3003 }
3004 
3005 template <typename CharT>
3006 static bool
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,const FlatMatch & fm,JSLinearString * text,const CharT * repChars,size_t repLength)3007 AppendDollarReplacement(StringBuffer& newReplaceChars, size_t firstDollarIndex,
3008                         const FlatMatch& fm, JSLinearString* text,
3009                         const CharT* repChars, size_t repLength)
3010 {
3011     MOZ_ASSERT(firstDollarIndex < repLength);
3012 
3013     size_t matchStart = fm.match();
3014     size_t matchLimit = matchStart + fm.patternLength();
3015 
3016     /* Move the pre-dollar chunk in bulk. */
3017     newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
3018 
3019     /* Move the rest char-by-char, interpreting dollars as we encounter them. */
3020     const CharT* repLimit = repChars + repLength;
3021     for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
3022         if (*it != '$' || it == repLimit - 1) {
3023             if (!newReplaceChars.append(*it))
3024                 return false;
3025             continue;
3026         }
3027 
3028         switch (*(it + 1)) {
3029           case '$': /* Eat one of the dollars. */
3030             if (!newReplaceChars.append(*it))
3031                 return false;
3032             break;
3033           case '&':
3034             if (!newReplaceChars.appendSubstring(text, matchStart, matchLimit - matchStart))
3035                 return false;
3036             break;
3037           case '`':
3038             if (!newReplaceChars.appendSubstring(text, 0, matchStart))
3039                 return false;
3040             break;
3041           case '\'':
3042             if (!newReplaceChars.appendSubstring(text, matchLimit, text->length() - matchLimit))
3043                 return false;
3044             break;
3045           default: /* The dollar we saw was not special (no matter what its mother told it). */
3046             if (!newReplaceChars.append(*it))
3047                 return false;
3048             continue;
3049         }
3050         ++it; /* We always eat an extra char in the above switch. */
3051     }
3052 
3053     return true;
3054 }
3055 
3056 /*
3057  * Perform a linear-scan dollar substitution on the replacement text,
3058  * constructing a result string that looks like:
3059  *
3060  *      newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
3061  */
3062 static JSString*
BuildDollarReplacement(JSContext * cx,JSString * textstrArg,JSLinearString * repstr,uint32_t firstDollarIndex,const FlatMatch & fm)3063 BuildDollarReplacement(JSContext* cx, JSString* textstrArg, JSLinearString* repstr,
3064                        uint32_t firstDollarIndex, const FlatMatch& fm)
3065 {
3066     RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
3067     if (!textstr)
3068         return nullptr;
3069 
3070     size_t matchStart = fm.match();
3071     size_t matchLimit = matchStart + fm.patternLength();
3072 
3073     /*
3074      * Most probably:
3075      *
3076      *      len(newstr) >= len(orig) - len(match) + len(replacement)
3077      *
3078      * Note that dollar vars _could_ make the resulting text smaller than this.
3079      */
3080     StringBuffer newReplaceChars(cx);
3081     if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
3082         return nullptr;
3083 
3084     if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
3085         return nullptr;
3086 
3087     bool res;
3088     if (repstr->hasLatin1Chars()) {
3089         AutoCheckCannotGC nogc;
3090         res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3091                                       repstr->latin1Chars(nogc), repstr->length());
3092     } else {
3093         AutoCheckCannotGC nogc;
3094         res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3095                                       repstr->twoByteChars(nogc), repstr->length());
3096     }
3097     if (!res)
3098         return nullptr;
3099 
3100     RootedString leftSide(cx, NewDependentString(cx, textstr, 0, matchStart));
3101     if (!leftSide)
3102         return nullptr;
3103 
3104     RootedString newReplace(cx, newReplaceChars.finishString());
3105     if (!newReplace)
3106         return nullptr;
3107 
3108     MOZ_ASSERT(textstr->length() >= matchLimit);
3109     RootedString rightSide(cx, NewDependentString(cx, textstr, matchLimit,
3110                                                   textstr->length() - matchLimit));
3111     if (!rightSide)
3112         return nullptr;
3113 
3114     RopeBuilder builder(cx);
3115     if (!builder.append(leftSide) || !builder.append(newReplace) || !builder.append(rightSide))
3116         return nullptr;
3117 
3118     return builder.result();
3119 }
3120 
3121 struct StringRange
3122 {
3123     size_t start;
3124     size_t length;
3125 
StringRangeStringRange3126     StringRange(size_t s, size_t l)
3127       : start(s), length(l)
3128     { }
3129 };
3130 
3131 template <typename CharT>
3132 static void
CopySubstringsToFatInline(JSFatInlineString * dest,const CharT * src,const StringRange * ranges,size_t rangesLen,size_t outputLen)3133 CopySubstringsToFatInline(JSFatInlineString* dest, const CharT* src, const StringRange* ranges,
3134                           size_t rangesLen, size_t outputLen)
3135 {
3136     CharT* buf = dest->init<CharT>(outputLen);
3137     size_t pos = 0;
3138     for (size_t i = 0; i < rangesLen; i++) {
3139         PodCopy(buf + pos, src + ranges[i].start, ranges[i].length);
3140         pos += ranges[i].length;
3141     }
3142 
3143     MOZ_ASSERT(pos == outputLen);
3144     buf[outputLen] = 0;
3145 }
3146 
3147 static inline JSFatInlineString*
FlattenSubstrings(JSContext * cx,HandleLinearString str,const StringRange * ranges,size_t rangesLen,size_t outputLen)3148 FlattenSubstrings(JSContext* cx, HandleLinearString str, const StringRange* ranges,
3149                   size_t rangesLen, size_t outputLen)
3150 {
3151     JSFatInlineString* result = Allocate<JSFatInlineString>(cx);
3152     if (!result)
3153         return nullptr;
3154 
3155     AutoCheckCannotGC nogc;
3156     if (str->hasLatin1Chars())
3157         CopySubstringsToFatInline(result, str->latin1Chars(nogc), ranges, rangesLen, outputLen);
3158     else
3159         CopySubstringsToFatInline(result, str->twoByteChars(nogc), ranges, rangesLen, outputLen);
3160     return result;
3161 }
3162 
3163 static JSString*
AppendSubstrings(JSContext * cx,HandleLinearString str,const StringRange * ranges,size_t rangesLen)3164 AppendSubstrings(JSContext* cx, HandleLinearString str, const StringRange* ranges,
3165                  size_t rangesLen)
3166 {
3167     MOZ_ASSERT(rangesLen);
3168 
3169     /* For single substrings, construct a dependent string. */
3170     if (rangesLen == 1)
3171         return NewDependentString(cx, str, ranges[0].start, ranges[0].length);
3172 
3173     bool isLatin1 = str->hasLatin1Chars();
3174     uint32_t fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
3175     if (isLatin1)
3176         fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_LATIN1;
3177 
3178     /* Collect substrings into a rope */
3179     size_t i = 0;
3180     RopeBuilder rope(cx);
3181     RootedString part(cx, nullptr);
3182     while (i < rangesLen) {
3183 
3184         /* Find maximum range that fits in JSFatInlineString */
3185         size_t substrLen = 0;
3186         size_t end = i;
3187         for (; end < rangesLen; end++) {
3188             if (substrLen + ranges[end].length > fatInlineMaxLength)
3189                 break;
3190             substrLen += ranges[end].length;
3191         }
3192 
3193         if (i == end) {
3194             /* Not even one range fits JSFatInlineString, use DependentString */
3195             const StringRange& sr = ranges[i++];
3196             part = NewDependentString(cx, str, sr.start, sr.length);
3197         } else {
3198             /* Copy the ranges (linearly) into a JSFatInlineString */
3199             part = FlattenSubstrings(cx, str, ranges + i, end - i, substrLen);
3200             i = end;
3201         }
3202 
3203         if (!part)
3204             return nullptr;
3205 
3206         /* Appending to the rope permanently roots the substring. */
3207         if (!rope.append(part))
3208             return nullptr;
3209     }
3210 
3211     return rope.result();
3212 }
3213 
3214 static JSString*
StrReplaceRegexpRemove(JSContext * cx,HandleString str,RegExpShared & re)3215 StrReplaceRegexpRemove(JSContext* cx, HandleString str, RegExpShared& re)
3216 {
3217     RootedLinearString linearStr(cx, str->ensureLinear(cx));
3218     if (!linearStr)
3219         return nullptr;
3220 
3221     Vector<StringRange, 16, SystemAllocPolicy> ranges;
3222 
3223     size_t charsLen = linearStr->length();
3224 
3225     ScopedMatchPairs matches(&cx->tempLifoAlloc());
3226     size_t startIndex = 0; /* Index used for iterating through the string. */
3227     size_t lastIndex = 0;  /* Index after last successful match. */
3228     size_t lazyIndex = 0;  /* Index before last successful match. */
3229 
3230     /* Accumulate StringRanges for unmatched substrings. */
3231     while (startIndex <= charsLen) {
3232         if (!CheckForInterrupt(cx))
3233             return nullptr;
3234 
3235         RegExpRunStatus status = re.execute(cx, linearStr, startIndex, &matches);
3236         if (status == RegExpRunStatus_Error)
3237             return nullptr;
3238         if (status == RegExpRunStatus_Success_NotFound)
3239             break;
3240         MatchPair& match = matches[0];
3241 
3242         /* Include the latest unmatched substring. */
3243         if (size_t(match.start) > lastIndex) {
3244             if (!ranges.append(StringRange(lastIndex, match.start - lastIndex)))
3245                 return nullptr;
3246         }
3247 
3248         lazyIndex = lastIndex;
3249         lastIndex = match.limit;
3250 
3251         startIndex = match.isEmpty() ? match.limit + 1 : match.limit;
3252 
3253         /* Non-global removal executes at most once. */
3254         if (!re.global())
3255             break;
3256     }
3257 
3258     RegExpStatics* res;
3259 
3260     /* If unmatched, return the input string. */
3261     if (!lastIndex) {
3262         if (startIndex > 0) {
3263             res = cx->global()->getRegExpStatics(cx);
3264             if (!res)
3265                 return nullptr;
3266             res->updateLazily(cx, linearStr, &re, lazyIndex);
3267         }
3268 
3269         return str;
3270     }
3271 
3272     /* The last successful match updates the RegExpStatics. */
3273     res = cx->global()->getRegExpStatics(cx);
3274     if (!res)
3275         return nullptr;
3276 
3277     res->updateLazily(cx, linearStr, &re, lazyIndex);
3278 
3279     /* Include any remaining part of the string. */
3280     if (lastIndex < charsLen) {
3281         if (!ranges.append(StringRange(lastIndex, charsLen - lastIndex)))
3282             return nullptr;
3283     }
3284 
3285     /* Handle the empty string before calling .begin(). */
3286     if (ranges.empty())
3287         return cx->runtime()->emptyString;
3288 
3289     return AppendSubstrings(cx, linearStr, ranges.begin(), ranges.length());
3290 }
3291 
3292 static inline JSString*
StrReplaceRegExp(JSContext * cx,ReplaceData & rdata)3293 StrReplaceRegExp(JSContext* cx, ReplaceData& rdata)
3294 {
3295     rdata.leftIndex = 0;
3296     rdata.calledBack = false;
3297 
3298     RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3299     if (!res)
3300         return nullptr;
3301 
3302     RegExpShared& re = rdata.g.regExp();
3303 
3304     // The spec doesn't describe this function very clearly, so we go ahead and
3305     // assume that when the input to String.prototype.replace is a global
3306     // RegExp, calling the replacer function (assuming one was provided) takes
3307     // place only after the matching is done. See the comment at the beginning
3308     // of DoMatchGlobal explaining why we can zero the the RegExp object's
3309     // lastIndex property here.
3310     if (re.global() && !rdata.g.zeroLastIndex(cx))
3311         return nullptr;
3312 
3313     /* Optimize removal. */
3314     if (rdata.repstr && rdata.repstr->length() == 0) {
3315         MOZ_ASSERT(!rdata.lambda && !rdata.elembase && rdata.dollarIndex == UINT32_MAX);
3316         return StrReplaceRegexpRemove(cx, rdata.str, re);
3317     }
3318 
3319     RootedLinearString linearStr(cx, rdata.str->ensureLinear(cx));
3320     if (!linearStr)
3321         return nullptr;
3322 
3323     size_t rightContextOffset = 0;
3324     if (re.global()) {
3325         if (!DoMatchForReplaceGlobal(cx, res, linearStr, re, rdata, &rightContextOffset))
3326             return nullptr;
3327     } else {
3328         if (!DoMatchForReplaceLocal(cx, res, linearStr, re, rdata, &rightContextOffset))
3329             return nullptr;
3330     }
3331 
3332     if (!rdata.calledBack) {
3333         /* Didn't match, so the string is unmodified. */
3334         return rdata.str;
3335     }
3336 
3337     MOZ_ASSERT(rightContextOffset <= rdata.str->length());
3338     size_t length = rdata.str->length() - rightContextOffset;
3339     if (!rdata.sb.appendSubstring(rdata.str, rightContextOffset, length))
3340         return nullptr;
3341 
3342     return rdata.sb.finishString();
3343 }
3344 
3345 static inline bool
str_replace_regexp(JSContext * cx,const CallArgs & args,ReplaceData & rdata)3346 str_replace_regexp(JSContext* cx, const CallArgs& args, ReplaceData& rdata)
3347 {
3348     if (!rdata.g.normalizeRegExp(cx, true, 2, args))
3349         return false;
3350 
3351     JSString* res = StrReplaceRegExp(cx, rdata);
3352     if (!res)
3353         return false;
3354 
3355     args.rval().setString(res);
3356     return true;
3357 }
3358 
3359 JSString*
str_replace_regexp_raw(JSContext * cx,HandleString string,Handle<RegExpObject * > regexp,HandleString replacement)3360 js::str_replace_regexp_raw(JSContext* cx, HandleString string, Handle<RegExpObject*> regexp,
3361                            HandleString replacement)
3362 {
3363     /* Optimize removal, so we don't have to create ReplaceData */
3364     if (replacement->length() == 0) {
3365         StringRegExpGuard guard(cx);
3366         if (!guard.initRegExp(cx, regexp))
3367             return nullptr;
3368 
3369         RegExpShared& re = guard.regExp();
3370         return StrReplaceRegexpRemove(cx, string, re);
3371     }
3372 
3373     ReplaceData rdata(cx);
3374     rdata.str = string;
3375 
3376     JSLinearString* repl = replacement->ensureLinear(cx);
3377     if (!repl)
3378         return nullptr;
3379 
3380     rdata.setReplacementString(repl);
3381 
3382     if (!rdata.g.initRegExp(cx, regexp))
3383         return nullptr;
3384 
3385     return StrReplaceRegExp(cx, rdata);
3386 }
3387 
3388 static JSString*
StrReplaceString(JSContext * cx,ReplaceData & rdata,const FlatMatch & fm)3389 StrReplaceString(JSContext* cx, ReplaceData& rdata, const FlatMatch& fm)
3390 {
3391     /*
3392      * Note: we could optimize the text.length == pattern.length case if we wanted,
3393      * even in the presence of dollar metachars.
3394      */
3395     if (rdata.dollarIndex != UINT32_MAX)
3396         return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollarIndex, fm);
3397     return BuildFlatReplacement(cx, rdata.str, rdata.repstr, fm);
3398 }
3399 
3400 static const uint32_t ReplaceOptArg = 2;
3401 
3402 JSString*
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)3403 js::str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
3404                           HandleString replacement)
3405 {
3406     ReplaceData rdata(cx);
3407 
3408     rdata.str = string;
3409     JSLinearString* repl = replacement->ensureLinear(cx);
3410     if (!repl)
3411         return nullptr;
3412     rdata.setReplacementString(repl);
3413 
3414     if (!rdata.g.init(cx, pattern))
3415         return nullptr;
3416     const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, ReplaceOptArg, false);
3417 
3418     if (fm->match() < 0)
3419         return string;
3420 
3421     return StrReplaceString(cx, rdata, *fm);
3422 }
3423 
3424 static inline bool
str_replace_flat_lambda(JSContext * cx,const CallArgs & outerArgs,ReplaceData & rdata,const FlatMatch & fm)3425 str_replace_flat_lambda(JSContext* cx, const CallArgs& outerArgs, ReplaceData& rdata,
3426                         const FlatMatch& fm)
3427 {
3428     RootedString matchStr(cx, NewDependentString(cx, rdata.str, fm.match(), fm.patternLength()));
3429     if (!matchStr)
3430         return false;
3431 
3432     /* lambda(matchStr, matchStart, textstr) */
3433     static const uint32_t lambdaArgc = 3;
3434     if (!rdata.fig.args().init(cx, lambdaArgc))
3435         return false;
3436 
3437     CallArgs& args = rdata.fig.args();
3438     args.setCallee(ObjectValue(*rdata.lambda));
3439     args.setThis(UndefinedValue());
3440 
3441     Value* sp = args.array();
3442     sp[0].setString(matchStr);
3443     sp[1].setInt32(fm.match());
3444     sp[2].setString(rdata.str);
3445 
3446     if (!rdata.fig.invoke(cx))
3447         return false;
3448 
3449     RootedString repstr(cx, ToString<CanGC>(cx, args.rval()));
3450     if (!repstr)
3451         return false;
3452 
3453     RootedString leftSide(cx, NewDependentString(cx, rdata.str, 0, fm.match()));
3454     if (!leftSide)
3455         return false;
3456 
3457     size_t matchLimit = fm.match() + fm.patternLength();
3458     RootedString rightSide(cx, NewDependentString(cx, rdata.str, matchLimit,
3459                                                   rdata.str->length() - matchLimit));
3460     if (!rightSide)
3461         return false;
3462 
3463     RopeBuilder builder(cx);
3464     if (!(builder.append(leftSide) &&
3465           builder.append(repstr) &&
3466           builder.append(rightSide))) {
3467         return false;
3468     }
3469 
3470     outerArgs.rval().setString(builder.result());
3471     return true;
3472 }
3473 
3474 /*
3475  * Pattern match the script to check if it is is indexing into a particular
3476  * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3477  * such cases, which are used by javascript packers (particularly the popular
3478  * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3479  * code patterns generated by such packers here.
3480  */
3481 static bool
LambdaIsGetElem(JSContext * cx,JSObject & lambda,MutableHandleNativeObject pobj)3482 LambdaIsGetElem(JSContext* cx, JSObject& lambda, MutableHandleNativeObject pobj)
3483 {
3484     if (!lambda.is<JSFunction>())
3485         return true;
3486 
3487     RootedFunction fun(cx, &lambda.as<JSFunction>());
3488     if (!fun->isInterpreted() || fun->isClassConstructor())
3489         return true;
3490 
3491     JSScript* script = fun->getOrCreateScript(cx);
3492     if (!script)
3493         return false;
3494 
3495     jsbytecode* pc = script->code();
3496 
3497     /*
3498      * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3499      * Rule out the (unlikely) possibility of a function with a call object
3500      * since it would make our scope walk off by 1.
3501      */
3502     if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->needsCallObject())
3503         return true;
3504     ScopeCoordinate sc(pc);
3505     ScopeObject* scope = &fun->environment()->as<ScopeObject>();
3506     for (unsigned i = 0; i < sc.hops(); ++i)
3507         scope = &scope->enclosingScope().as<ScopeObject>();
3508     Value b = scope->aliasedVar(sc);
3509     pc += JSOP_GETALIASEDVAR_LENGTH;
3510 
3511     /* Look for 'a' to be the lambda's first argument. */
3512     if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0)
3513         return true;
3514     pc += JSOP_GETARG_LENGTH;
3515 
3516     /* 'b[a]' */
3517     if (JSOp(*pc) != JSOP_GETELEM)
3518         return true;
3519     pc += JSOP_GETELEM_LENGTH;
3520 
3521     /* 'return b[a]' */
3522     if (JSOp(*pc) != JSOP_RETURN)
3523         return true;
3524 
3525     /* 'b' must behave like a normal object. */
3526     if (!b.isObject())
3527         return true;
3528 
3529     JSObject& bobj = b.toObject();
3530     const Class* clasp = bobj.getClass();
3531     if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty)
3532         return true;
3533 
3534     pobj.set(&bobj.as<NativeObject>());
3535     return true;
3536 }
3537 
3538 bool
str_replace(JSContext * cx,unsigned argc,Value * vp)3539 js::str_replace(JSContext* cx, unsigned argc, Value* vp)
3540 {
3541     CallArgs args = CallArgsFromVp(argc, vp);
3542 
3543     ReplaceData rdata(cx);
3544     rdata.str = ThisToStringForStringProto(cx, args);
3545     if (!rdata.str)
3546         return false;
3547 
3548     if (!rdata.g.init(cx, args))
3549         return false;
3550 
3551     /* Extract replacement string/function. */
3552     if (args.length() >= ReplaceOptArg && IsCallable(args[1])) {
3553         rdata.setReplacementFunction(&args[1].toObject());
3554 
3555         if (!LambdaIsGetElem(cx, *rdata.lambda, &rdata.elembase))
3556             return false;
3557     } else {
3558         JSLinearString* string = ArgToRootedString(cx, args, 1);
3559         if (!string)
3560             return false;
3561 
3562         rdata.setReplacementString(string);
3563     }
3564 
3565     rdata.fig.initFunction(ObjectOrNullValue(rdata.lambda));
3566 
3567     /*
3568      * Unlike its |String.prototype| brethren, |replace| doesn't convert
3569      * its input to a regular expression. (Even if it contains metachars.)
3570      *
3571      * However, if the user invokes our (non-standard) |flags| argument
3572      * extension then we revert to creating a regular expression. Note that
3573      * this is observable behavior through the side-effect mutation of the
3574      * |RegExp| statics.
3575      */
3576 
3577     const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
3578 
3579     if (!fm) {
3580         if (cx->isExceptionPending())  /* oom in RopeMatch in tryFlatMatch */
3581             return false;
3582         return str_replace_regexp(cx, args, rdata);
3583     }
3584 
3585     if (fm->match() < 0) {
3586         args.rval().setString(rdata.str);
3587         return true;
3588     }
3589 
3590     if (rdata.lambda)
3591         return str_replace_flat_lambda(cx, args, rdata, *fm);
3592 
3593     JSString* res = StrReplaceString(cx, rdata, *fm);
3594     if (!res)
3595         return false;
3596 
3597     args.rval().setString(res);
3598     return true;
3599 }
3600 
3601 namespace {
3602 
3603 class SplitMatchResult {
3604     size_t endIndex_;
3605     size_t length_;
3606 
3607   public:
setFailure()3608     void setFailure() {
3609         JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
3610         endIndex_ = SIZE_MAX;
3611     }
isFailure() const3612     bool isFailure() const {
3613         return endIndex_ == SIZE_MAX;
3614     }
endIndex() const3615     size_t endIndex() const {
3616         MOZ_ASSERT(!isFailure());
3617         return endIndex_;
3618     }
length() const3619     size_t length() const {
3620         MOZ_ASSERT(!isFailure());
3621         return length_;
3622     }
setResult(size_t length,size_t endIndex)3623     void setResult(size_t length, size_t endIndex) {
3624         length_ = length;
3625         endIndex_ = endIndex;
3626     }
3627 };
3628 
3629 } /* anonymous namespace */
3630 
3631 template<class Matcher>
3632 static JSObject*
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,const Matcher & splitMatch,HandleObjectGroup group)3633 SplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, const Matcher& splitMatch,
3634             HandleObjectGroup group)
3635 {
3636     size_t strLength = str->length();
3637     SplitMatchResult result;
3638 
3639     /* Step 11. */
3640     if (strLength == 0) {
3641         if (!splitMatch(cx, str, 0, &result))
3642             return nullptr;
3643 
3644         /*
3645          * NB: Unlike in the non-empty string case, it's perfectly fine
3646          *     (indeed the spec requires it) if we match at the end of the
3647          *     string.  Thus these cases should hold:
3648          *
3649          *   var a = "".split("");
3650          *   assertEq(a.length, 0);
3651          *   var b = "".split(/.?/);
3652          *   assertEq(b.length, 0);
3653          */
3654         if (!result.isFailure())
3655             return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3656 
3657         RootedValue v(cx, StringValue(str));
3658         return NewCopiedArrayTryUseGroup(cx, group, v.address(), 1);
3659     }
3660 
3661     /* Step 12. */
3662     size_t lastEndIndex = 0;
3663     size_t index = 0;
3664 
3665     /* Step 13. */
3666     AutoValueVector splits(cx);
3667 
3668     while (index < strLength) {
3669         /* Step 13(a). */
3670         if (!splitMatch(cx, str, index, &result))
3671             return nullptr;
3672 
3673         /*
3674          * Step 13(b).
3675          *
3676          * Our match algorithm differs from the spec in that it returns the
3677          * next index at which a match happens.  If no match happens we're
3678          * done.
3679          *
3680          * But what if the match is at the end of the string (and the string is
3681          * not empty)?  Per 13(c)(ii) this shouldn't be a match, so we have to
3682          * specially exclude it.  Thus this case should hold:
3683          *
3684          *   var a = "abc".split(/\b/);
3685          *   assertEq(a.length, 1);
3686          *   assertEq(a[0], "abc");
3687          */
3688         if (result.isFailure())
3689             break;
3690 
3691         /* Step 13(c)(i). */
3692         size_t sepLength = result.length();
3693         size_t endIndex = result.endIndex();
3694         if (sepLength == 0 && endIndex == strLength)
3695             break;
3696 
3697         /* Step 13(c)(ii). */
3698         if (endIndex == lastEndIndex) {
3699             index++;
3700             continue;
3701         }
3702 
3703         /* Step 13(c)(iii). */
3704         MOZ_ASSERT(lastEndIndex < endIndex);
3705         MOZ_ASSERT(sepLength <= strLength);
3706         MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3707 
3708         /* Steps 13(c)(iii)(1-3). */
3709         size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3710         JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3711         if (!sub || !splits.append(StringValue(sub)))
3712             return nullptr;
3713 
3714         /* Step 13(c)(iii)(4). */
3715         if (splits.length() == limit)
3716             return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3717 
3718         /* Step 13(c)(iii)(5). */
3719         lastEndIndex = endIndex;
3720 
3721         /* Step 13(c)(iii)(6-7). */
3722         if (Matcher::returnsCaptures) {
3723             RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3724             if (!res)
3725                 return nullptr;
3726 
3727             const MatchPairs& matches = res->getMatches();
3728             for (size_t i = 0; i < matches.parenCount(); i++) {
3729                 /* Steps 13(c)(iii)(7)(a-c). */
3730                 if (!matches[i + 1].isUndefined()) {
3731                     JSSubString parsub;
3732                     res->getParen(i + 1, &parsub);
3733                     sub = NewDependentString(cx, parsub.base, parsub.offset, parsub.length);
3734                     if (!sub || !splits.append(StringValue(sub)))
3735                         return nullptr;
3736                 } else {
3737                     if (!splits.append(UndefinedValue()))
3738                         return nullptr;
3739                 }
3740 
3741                 /* Step 13(c)(iii)(7)(d). */
3742                 if (splits.length() == limit)
3743                     return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3744             }
3745         }
3746 
3747         /* Step 13(c)(iii)(8). */
3748         index = lastEndIndex;
3749     }
3750 
3751     /* Steps 14-15. */
3752     JSString* sub = NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3753     if (!sub || !splits.append(StringValue(sub)))
3754         return nullptr;
3755 
3756     /* Step 16. */
3757     return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3758 }
3759 
3760 // Fast-path for splitting a string into a character array via split("").
3761 static JSObject*
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleObjectGroup group)3762 CharSplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, HandleObjectGroup group)
3763 {
3764     size_t strLength = str->length();
3765     if (strLength == 0)
3766         return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3767 
3768     js::StaticStrings& staticStrings = cx->staticStrings();
3769     uint32_t resultlen = (limit < strLength ? limit : strLength);
3770 
3771     AutoValueVector splits(cx);
3772     if (!splits.reserve(resultlen))
3773         return nullptr;
3774 
3775     for (size_t i = 0; i < resultlen; ++i) {
3776         JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3777         if (!sub)
3778             return nullptr;
3779         splits.infallibleAppend(StringValue(sub));
3780     }
3781 
3782     return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3783 }
3784 
3785 namespace {
3786 
3787 /*
3788  * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3789  * paths for regular expression and string separators.
3790  *
3791  * The algorithm differs from the spec in that the we return the next index at
3792  * which a match happens.
3793  */
3794 class SplitRegExpMatcher
3795 {
3796     RegExpShared& re;
3797     RegExpStatics* res;
3798 
3799   public:
SplitRegExpMatcher(RegExpShared & re,RegExpStatics * res)3800     SplitRegExpMatcher(RegExpShared& re, RegExpStatics* res) : re(re), res(res) {}
3801 
3802     static const bool returnsCaptures = true;
3803 
operator ()(JSContext * cx,HandleLinearString str,size_t index,SplitMatchResult * result) const3804     bool operator()(JSContext* cx, HandleLinearString str, size_t index,
3805                     SplitMatchResult* result) const
3806     {
3807         ScopedMatchPairs matches(&cx->tempLifoAlloc());
3808         RegExpRunStatus status = re.execute(cx, str, index, &matches);
3809         if (status == RegExpRunStatus_Error)
3810             return false;
3811 
3812         if (status == RegExpRunStatus_Success_NotFound) {
3813             result->setFailure();
3814             return true;
3815         }
3816 
3817         if (!res->updateFromMatchPairs(cx, str, matches))
3818             return false;
3819 
3820         JSSubString sep;
3821         res->getLastMatch(&sep);
3822 
3823         result->setResult(sep.length, matches[0].limit);
3824         return true;
3825     }
3826 };
3827 
3828 class SplitStringMatcher
3829 {
3830     RootedLinearString sep;
3831 
3832   public:
SplitStringMatcher(JSContext * cx,HandleLinearString sep)3833     SplitStringMatcher(JSContext* cx, HandleLinearString sep)
3834       : sep(cx, sep)
3835     {}
3836 
3837     static const bool returnsCaptures = false;
3838 
operator ()(JSContext * cx,JSLinearString * str,size_t index,SplitMatchResult * res) const3839     bool operator()(JSContext* cx, JSLinearString* str, size_t index, SplitMatchResult* res) const
3840     {
3841         MOZ_ASSERT(index == 0 || index < str->length());
3842         int match = StringMatch(str, sep, index);
3843         if (match == -1)
3844             res->setFailure();
3845         else
3846             res->setResult(sep->length(), match + sep->length());
3847         return true;
3848     }
3849 };
3850 
3851 } /* anonymous namespace */
3852 
3853 /* ES5 15.5.4.14 */
3854 bool
str_split(JSContext * cx,unsigned argc,Value * vp)3855 js::str_split(JSContext* cx, unsigned argc, Value* vp)
3856 {
3857     CallArgs args = CallArgsFromVp(argc, vp);
3858 
3859     /* Steps 1-2. */
3860     RootedString str(cx, ThisToStringForStringProto(cx, args));
3861     if (!str)
3862         return false;
3863 
3864     RootedObjectGroup group(cx, ObjectGroup::callingAllocationSiteGroup(cx, JSProto_Array));
3865     if (!group)
3866         return false;
3867 
3868     /* Step 5: Use the second argument as the split limit, if given. */
3869     uint32_t limit;
3870     if (args.hasDefined(1)) {
3871         double d;
3872         if (!ToNumber(cx, args[1], &d))
3873             return false;
3874         limit = ToUint32(d);
3875     } else {
3876         limit = UINT32_MAX;
3877     }
3878 
3879     /* Step 8. */
3880     RegExpGuard re(cx);
3881     RootedLinearString sepstr(cx);
3882     bool sepDefined = args.hasDefined(0);
3883     if (sepDefined) {
3884         ESClassValue cls;
3885         if (!GetClassOfValue(cx, args[0], &cls))
3886             return false;
3887 
3888         if (cls == ESClass_RegExp) {
3889             RootedObject obj(cx, &args[0].toObject());
3890             if (!RegExpToShared(cx, obj, &re))
3891                 return false;
3892         } else {
3893             sepstr = ArgToRootedString(cx, args, 0);
3894             if (!sepstr)
3895                 return false;
3896         }
3897     }
3898 
3899     /* Step 9. */
3900     if (limit == 0) {
3901         JSObject* aobj = NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3902         if (!aobj)
3903             return false;
3904         args.rval().setObject(*aobj);
3905         return true;
3906     }
3907 
3908     /* Step 10. */
3909     if (!sepDefined) {
3910         RootedValue v(cx, StringValue(str));
3911         JSObject* aobj = NewCopiedArrayTryUseGroup(cx, group, v.address(), 1);
3912         if (!aobj)
3913             return false;
3914         args.rval().setObject(*aobj);
3915         return true;
3916     }
3917     RootedLinearString linearStr(cx, str->ensureLinear(cx));
3918     if (!linearStr)
3919         return false;
3920 
3921     /* Steps 11-15. */
3922     RootedObject aobj(cx);
3923     if (!re.initialized()) {
3924         if (sepstr->length() == 0) {
3925             aobj = CharSplitHelper(cx, linearStr, limit, group);
3926         } else {
3927             SplitStringMatcher matcher(cx, sepstr);
3928             aobj = SplitHelper(cx, linearStr, limit, matcher, group);
3929         }
3930     } else {
3931         RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3932         if (!res)
3933             return false;
3934         SplitRegExpMatcher matcher(*re, res);
3935         aobj = SplitHelper(cx, linearStr, limit, matcher, group);
3936     }
3937     if (!aobj)
3938         return false;
3939 
3940     /* Step 16. */
3941     MOZ_ASSERT(aobj->group() == group);
3942     args.rval().setObject(*aobj);
3943     return true;
3944 }
3945 
3946 JSObject*
str_split_string(JSContext * cx,HandleObjectGroup group,HandleString str,HandleString sep)3947 js::str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep)
3948 {
3949     RootedLinearString linearStr(cx, str->ensureLinear(cx));
3950     if (!linearStr)
3951         return nullptr;
3952 
3953     RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3954     if (!linearSep)
3955         return nullptr;
3956 
3957     uint32_t limit = UINT32_MAX;
3958 
3959     if (linearSep->length() == 0)
3960         return CharSplitHelper(cx, linearStr, limit, group);
3961 
3962     SplitStringMatcher matcher(cx, linearSep);
3963     return SplitHelper(cx, linearStr, limit, matcher, group);
3964 }
3965 
3966 /*
3967  * Python-esque sequence operations.
3968  */
3969 static bool
str_concat(JSContext * cx,unsigned argc,Value * vp)3970 str_concat(JSContext* cx, unsigned argc, Value* vp)
3971 {
3972     CallArgs args = CallArgsFromVp(argc, vp);
3973     JSString* str = ThisToStringForStringProto(cx, args);
3974     if (!str)
3975         return false;
3976 
3977     for (unsigned i = 0; i < args.length(); i++) {
3978         JSString* argStr = ToString<NoGC>(cx, args[i]);
3979         if (!argStr) {
3980             RootedString strRoot(cx, str);
3981             argStr = ToString<CanGC>(cx, args[i]);
3982             if (!argStr)
3983                 return false;
3984             str = strRoot;
3985         }
3986 
3987         JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
3988         if (next) {
3989             str = next;
3990         } else {
3991             RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3992             str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3993             if (!str)
3994                 return false;
3995         }
3996     }
3997 
3998     args.rval().setString(str);
3999     return true;
4000 }
4001 
4002 static const JSFunctionSpec string_methods[] = {
4003 #if JS_HAS_TOSOURCE
4004     JS_FN(js_toSource_str,     str_toSource,          0,0),
4005 #endif
4006 
4007     /* Java-like methods. */
4008     JS_FN(js_toString_str,     str_toString,          0,0),
4009     JS_FN(js_valueOf_str,      str_toString,          0,0),
4010     JS_FN("toLowerCase",       str_toLowerCase,       0,JSFUN_GENERIC_NATIVE),
4011     JS_FN("toUpperCase",       str_toUpperCase,       0,JSFUN_GENERIC_NATIVE),
4012     JS_INLINABLE_FN("charAt",  str_charAt,            1,JSFUN_GENERIC_NATIVE, StringCharAt),
4013     JS_INLINABLE_FN("charCodeAt", str_charCodeAt,     1,JSFUN_GENERIC_NATIVE, StringCharCodeAt),
4014     JS_SELF_HOSTED_FN("substring", "String_substring", 2,0),
4015     JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
4016     JS_FN("includes",          str_includes,          1,JSFUN_GENERIC_NATIVE),
4017     JS_FN("contains",          str_contains,          1,JSFUN_GENERIC_NATIVE),
4018     JS_FN("indexOf",           str_indexOf,           1,JSFUN_GENERIC_NATIVE),
4019     JS_FN("lastIndexOf",       str_lastIndexOf,       1,JSFUN_GENERIC_NATIVE),
4020     JS_FN("startsWith",        str_startsWith,        1,JSFUN_GENERIC_NATIVE),
4021     JS_FN("endsWith",          str_endsWith,          1,JSFUN_GENERIC_NATIVE),
4022     JS_FN("trim",              str_trim,              0,JSFUN_GENERIC_NATIVE),
4023     JS_FN("trimLeft",          str_trimLeft,          0,JSFUN_GENERIC_NATIVE),
4024     JS_FN("trimRight",         str_trimRight,         0,JSFUN_GENERIC_NATIVE),
4025     JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
4026     JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
4027 #if EXPOSE_INTL_API
4028     JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
4029 #else
4030     JS_FN("localeCompare",     str_localeCompare,     1,JSFUN_GENERIC_NATIVE),
4031 #endif
4032     JS_SELF_HOSTED_FN("repeat", "String_repeat",      1,0),
4033 #if EXPOSE_INTL_API
4034     JS_FN("normalize",         str_normalize,         0,JSFUN_GENERIC_NATIVE),
4035 #endif
4036 
4037     /* Perl-ish methods (search is actually Python-esque). */
4038     JS_FN("match",             str_match,             1,JSFUN_GENERIC_NATIVE),
4039     JS_FN("search",            str_search,            1,JSFUN_GENERIC_NATIVE),
4040     JS_INLINABLE_FN("replace", str_replace,           2,JSFUN_GENERIC_NATIVE, StringReplace),
4041     JS_INLINABLE_FN("split",   str_split,             2,JSFUN_GENERIC_NATIVE, StringSplit),
4042     JS_SELF_HOSTED_FN("substr", "String_substr",      2,0),
4043 
4044     /* Python-esque sequence methods. */
4045     JS_FN("concat",            str_concat,            1,JSFUN_GENERIC_NATIVE),
4046     JS_SELF_HOSTED_FN("slice", "String_slice",        2,0),
4047 
4048     /* HTML string methods. */
4049     JS_SELF_HOSTED_FN("bold",     "String_bold",       0,0),
4050     JS_SELF_HOSTED_FN("italics",  "String_italics",    0,0),
4051     JS_SELF_HOSTED_FN("fixed",    "String_fixed",      0,0),
4052     JS_SELF_HOSTED_FN("strike",   "String_strike",     0,0),
4053     JS_SELF_HOSTED_FN("small",    "String_small",      0,0),
4054     JS_SELF_HOSTED_FN("big",      "String_big",        0,0),
4055     JS_SELF_HOSTED_FN("blink",    "String_blink",      0,0),
4056     JS_SELF_HOSTED_FN("sup",      "String_sup",        0,0),
4057     JS_SELF_HOSTED_FN("sub",      "String_sub",        0,0),
4058     JS_SELF_HOSTED_FN("anchor",   "String_anchor",     1,0),
4059     JS_SELF_HOSTED_FN("link",     "String_link",       1,0),
4060     JS_SELF_HOSTED_FN("fontcolor","String_fontcolor",  1,0),
4061     JS_SELF_HOSTED_FN("fontsize", "String_fontsize",   1,0),
4062 
4063     JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0,0),
4064     JS_FS_END
4065 };
4066 
4067 // ES6 rev 27 (2014 Aug 24) 21.1.1
4068 bool
StringConstructor(JSContext * cx,unsigned argc,Value * vp)4069 js::StringConstructor(JSContext* cx, unsigned argc, Value* vp)
4070 {
4071     CallArgs args = CallArgsFromVp(argc, vp);
4072 
4073     RootedString str(cx);
4074     if (args.length() > 0) {
4075         if (!args.isConstructing() && args[0].isSymbol())
4076             return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
4077 
4078         str = ToString<CanGC>(cx, args[0]);
4079         if (!str)
4080             return false;
4081     } else {
4082         str = cx->runtime()->emptyString;
4083     }
4084 
4085     if (args.isConstructing()) {
4086         RootedObject proto(cx);
4087         RootedObject newTarget(cx, &args.newTarget().toObject());
4088         if (!GetPrototypeFromConstructor(cx, newTarget, &proto))
4089             return false;
4090 
4091         StringObject* strobj = StringObject::create(cx, str, proto);
4092         if (!strobj)
4093             return false;
4094         args.rval().setObject(*strobj);
4095         return true;
4096     }
4097 
4098     args.rval().setString(str);
4099     return true;
4100 }
4101 
4102 static bool
str_fromCharCode_few_args(JSContext * cx,const CallArgs & args)4103 str_fromCharCode_few_args(JSContext* cx, const CallArgs& args)
4104 {
4105     MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE);
4106 
4107     char16_t chars[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
4108     for (unsigned i = 0; i < args.length(); i++) {
4109         uint16_t code;
4110         if (!ToUint16(cx, args[i], &code))
4111             return false;
4112         chars[i] = char16_t(code);
4113     }
4114     JSString* str = NewStringCopyN<CanGC>(cx, chars, args.length());
4115     if (!str)
4116         return false;
4117     args.rval().setString(str);
4118     return true;
4119 }
4120 
4121 bool
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)4122 js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
4123 {
4124     CallArgs args = CallArgsFromVp(argc, vp);
4125 
4126     MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
4127 
4128     // Optimize the single-char case.
4129     if (args.length() == 1)
4130         return str_fromCharCode_one_arg(cx, args[0], args.rval());
4131 
4132     // Optimize the case where the result will definitely fit in an inline
4133     // string (thin or fat) and so we don't need to malloc the chars. (We could
4134     // cover some cases where args.length() goes up to
4135     // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
4136     // all Latin1, but it doesn't seem worth the effort.)
4137     if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE)
4138         return str_fromCharCode_few_args(cx, args);
4139 
4140     char16_t* chars = cx->pod_malloc<char16_t>(args.length() + 1);
4141     if (!chars)
4142         return false;
4143     for (unsigned i = 0; i < args.length(); i++) {
4144         uint16_t code;
4145         if (!ToUint16(cx, args[i], &code)) {
4146             js_free(chars);
4147             return false;
4148         }
4149         chars[i] = char16_t(code);
4150     }
4151     chars[args.length()] = 0;
4152     JSString* str = NewString<CanGC>(cx, chars, args.length());
4153     if (!str) {
4154         js_free(chars);
4155         return false;
4156     }
4157 
4158     args.rval().setString(str);
4159     return true;
4160 }
4161 
4162 bool
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)4163 js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval)
4164 {
4165     uint16_t ucode;
4166 
4167     if (!ToUint16(cx, code, &ucode))
4168         return false;
4169 
4170     if (StaticStrings::hasUnit(ucode)) {
4171         rval.setString(cx->staticStrings().getUnit(ucode));
4172         return true;
4173     }
4174 
4175     char16_t c = char16_t(ucode);
4176     JSString* str = NewStringCopyN<CanGC>(cx, &c, 1);
4177     if (!str)
4178         return false;
4179 
4180     rval.setString(str);
4181     return true;
4182 }
4183 
4184 static const JSFunctionSpec string_static_methods[] = {
4185     JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0, StringFromCharCode),
4186 
4187     JS_SELF_HOSTED_FN("fromCodePoint",   "String_static_fromCodePoint", 1,0),
4188     JS_SELF_HOSTED_FN("raw",             "String_static_raw",           2,0),
4189     JS_SELF_HOSTED_FN("substring",       "String_static_substring",     3,0),
4190     JS_SELF_HOSTED_FN("substr",          "String_static_substr",        3,0),
4191     JS_SELF_HOSTED_FN("slice",           "String_static_slice",         3,0),
4192 
4193     // This must be at the end because of bug 853075: functions listed after
4194     // self-hosted methods aren't available in self-hosted code.
4195 #if EXPOSE_INTL_API
4196     JS_SELF_HOSTED_FN("localeCompare",   "String_static_localeCompare", 2,0),
4197 #endif
4198     JS_FS_END
4199 };
4200 
4201 /* static */ Shape*
assignInitialShape(ExclusiveContext * cx,Handle<StringObject * > obj)4202 StringObject::assignInitialShape(ExclusiveContext* cx, Handle<StringObject*> obj)
4203 {
4204     MOZ_ASSERT(obj->empty());
4205 
4206     return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT,
4207                                 JSPROP_PERMANENT | JSPROP_READONLY);
4208 }
4209 
4210 JSObject*
InitStringClass(JSContext * cx,HandleObject obj)4211 js::InitStringClass(JSContext* cx, HandleObject obj)
4212 {
4213     MOZ_ASSERT(obj->isNative());
4214 
4215     Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>());
4216 
4217     Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
4218     RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_));
4219     if (!proto || !proto->as<StringObject>().init(cx, empty))
4220         return nullptr;
4221 
4222     /* Now create the String function. */
4223     RootedFunction ctor(cx);
4224     ctor = global->createConstructor(cx, StringConstructor, cx->names().String, 1,
4225                                      AllocKind::FUNCTION, &jit::JitInfo_String);
4226     if (!ctor)
4227         return nullptr;
4228 
4229     if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto))
4230         return nullptr;
4231 
4232     if (!LinkConstructorAndPrototype(cx, ctor, proto))
4233         return nullptr;
4234 
4235     if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
4236         !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods))
4237     {
4238         return nullptr;
4239     }
4240 
4241     /*
4242      * Define escape/unescape, the URI encode/decode functions, and maybe
4243      * uneval on the global object.
4244      */
4245     if (!JS_DefineFunctions(cx, global, string_functions))
4246         return nullptr;
4247 
4248     return proto;
4249 }
4250 
4251 const char*
ValueToPrintable(JSContext * cx,const Value & vArg,JSAutoByteString * bytes,bool asSource)4252 js::ValueToPrintable(JSContext* cx, const Value& vArg, JSAutoByteString* bytes, bool asSource)
4253 {
4254     RootedValue v(cx, vArg);
4255     JSString* str;
4256     if (asSource)
4257         str = ValueToSource(cx, v);
4258     else
4259         str = ToString<CanGC>(cx, v);
4260     if (!str)
4261         return nullptr;
4262     str = QuoteString(cx, str, 0);
4263     if (!str)
4264         return nullptr;
4265     return bytes->encodeLatin1(cx, str);
4266 }
4267 
4268 template <AllowGC allowGC>
4269 JSString*
ToStringSlow(ExclusiveContext * cx,typename MaybeRooted<Value,allowGC>::HandleType arg)4270 js::ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg)
4271 {
4272     /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4273     MOZ_ASSERT(!arg.isString());
4274 
4275     Value v = arg;
4276     if (!v.isPrimitive()) {
4277         if (!cx->shouldBeJSContext() || !allowGC)
4278             return nullptr;
4279         RootedValue v2(cx, v);
4280         if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2))
4281             return nullptr;
4282         v = v2;
4283     }
4284 
4285     JSString* str;
4286     if (v.isString()) {
4287         str = v.toString();
4288     } else if (v.isInt32()) {
4289         str = Int32ToString<allowGC>(cx, v.toInt32());
4290     } else if (v.isDouble()) {
4291         str = NumberToString<allowGC>(cx, v.toDouble());
4292     } else if (v.isBoolean()) {
4293         str = BooleanToString(cx, v.toBoolean());
4294     } else if (v.isNull()) {
4295         str = cx->names().null;
4296     } else if (v.isSymbol()) {
4297         if (cx->shouldBeJSContext() && allowGC) {
4298             JS_ReportErrorNumber(cx->asJSContext(), GetErrorMessage, nullptr,
4299                                  JSMSG_SYMBOL_TO_STRING);
4300         }
4301         return nullptr;
4302     } else {
4303         MOZ_ASSERT(v.isUndefined());
4304         str = cx->names().undefined;
4305     }
4306     return str;
4307 }
4308 
4309 template JSString*
4310 js::ToStringSlow<CanGC>(ExclusiveContext* cx, HandleValue arg);
4311 
4312 template JSString*
4313 js::ToStringSlow<NoGC>(ExclusiveContext* cx, Value arg);
4314 
JS_PUBLIC_API(JSString *)4315 JS_PUBLIC_API(JSString*)
4316 js::ToStringSlow(JSContext* cx, HandleValue v)
4317 {
4318     return ToStringSlow<CanGC>(cx, v);
4319 }
4320 
4321 static JSString*
SymbolToSource(JSContext * cx,Symbol * symbol)4322 SymbolToSource(JSContext* cx, Symbol* symbol)
4323 {
4324     RootedString desc(cx, symbol->description());
4325     SymbolCode code = symbol->code();
4326     if (code != SymbolCode::InSymbolRegistry && code != SymbolCode::UniqueSymbol) {
4327         // Well-known symbol.
4328         MOZ_ASSERT(uint32_t(code) < JS::WellKnownSymbolLimit);
4329         return desc;
4330     }
4331 
4332     StringBuffer buf(cx);
4333     if (code == SymbolCode::InSymbolRegistry ? !buf.append("Symbol.for(") : !buf.append("Symbol("))
4334         return nullptr;
4335     if (desc) {
4336         desc = StringToSource(cx, desc);
4337         if (!desc || !buf.append(desc))
4338             return nullptr;
4339     }
4340     if (!buf.append(')'))
4341         return nullptr;
4342     return buf.finishString();
4343 }
4344 
4345 JSString*
ValueToSource(JSContext * cx,HandleValue v)4346 js::ValueToSource(JSContext* cx, HandleValue v)
4347 {
4348     JS_CHECK_RECURSION(cx, return nullptr);
4349     assertSameCompartment(cx, v);
4350 
4351     if (v.isUndefined())
4352         return cx->names().void0;
4353     if (v.isString())
4354         return StringToSource(cx, v.toString());
4355     if (v.isSymbol())
4356         return SymbolToSource(cx, v.toSymbol());
4357     if (v.isPrimitive()) {
4358         /* Special case to preserve negative zero, _contra_ toString. */
4359         if (v.isDouble() && IsNegativeZero(v.toDouble())) {
4360             /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4361             static const char16_t js_negzero_ucNstr[] = {'-', '0'};
4362 
4363             return NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2);
4364         }
4365         return ToString<CanGC>(cx, v);
4366     }
4367 
4368     RootedValue fval(cx);
4369     RootedObject obj(cx, &v.toObject());
4370     if (!GetProperty(cx, obj, obj, cx->names().toSource, &fval))
4371         return nullptr;
4372     if (IsCallable(fval)) {
4373         RootedValue rval(cx);
4374         if (!Invoke(cx, ObjectValue(*obj), fval, 0, nullptr, &rval))
4375             return nullptr;
4376         return ToString<CanGC>(cx, rval);
4377     }
4378 
4379     return ObjectToSource(cx, obj);
4380 }
4381 
4382 JSString*
StringToSource(JSContext * cx,JSString * str)4383 js::StringToSource(JSContext* cx, JSString* str)
4384 {
4385     return QuoteString(cx, str, '"');
4386 }
4387 
4388 bool
EqualChars(JSLinearString * str1,JSLinearString * str2)4389 js::EqualChars(JSLinearString* str1, JSLinearString* str2)
4390 {
4391     MOZ_ASSERT(str1->length() == str2->length());
4392 
4393     size_t len = str1->length();
4394 
4395     AutoCheckCannotGC nogc;
4396     if (str1->hasTwoByteChars()) {
4397         if (str2->hasTwoByteChars())
4398             return PodEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len);
4399 
4400         return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
4401     }
4402 
4403     if (str2->hasLatin1Chars())
4404         return PodEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
4405 
4406     return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
4407 }
4408 
4409 bool
EqualStrings(JSContext * cx,JSString * str1,JSString * str2,bool * result)4410 js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result)
4411 {
4412     if (str1 == str2) {
4413         *result = true;
4414         return true;
4415     }
4416 
4417     size_t length1 = str1->length();
4418     if (length1 != str2->length()) {
4419         *result = false;
4420         return true;
4421     }
4422 
4423     JSLinearString* linear1 = str1->ensureLinear(cx);
4424     if (!linear1)
4425         return false;
4426     JSLinearString* linear2 = str2->ensureLinear(cx);
4427     if (!linear2)
4428         return false;
4429 
4430     *result = EqualChars(linear1, linear2);
4431     return true;
4432 }
4433 
4434 bool
EqualStrings(JSLinearString * str1,JSLinearString * str2)4435 js::EqualStrings(JSLinearString* str1, JSLinearString* str2)
4436 {
4437     if (str1 == str2)
4438         return true;
4439 
4440     size_t length1 = str1->length();
4441     if (length1 != str2->length())
4442         return false;
4443 
4444     return EqualChars(str1, str2);
4445 }
4446 
4447 static int32_t
CompareStringsImpl(JSLinearString * str1,JSLinearString * str2)4448 CompareStringsImpl(JSLinearString* str1, JSLinearString* str2)
4449 {
4450     size_t len1 = str1->length();
4451     size_t len2 = str2->length();
4452 
4453     AutoCheckCannotGC nogc;
4454     if (str1->hasLatin1Chars()) {
4455         const Latin1Char* chars1 = str1->latin1Chars(nogc);
4456         return str2->hasLatin1Chars()
4457                ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4458                : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4459     }
4460 
4461     const char16_t* chars1 = str1->twoByteChars(nogc);
4462     return str2->hasLatin1Chars()
4463            ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4464            : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4465 }
4466 
4467 int32_t
CompareChars(const char16_t * s1,size_t len1,JSLinearString * s2)4468 js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2)
4469 {
4470     AutoCheckCannotGC nogc;
4471     return s2->hasLatin1Chars()
4472            ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
4473            : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
4474 }
4475 
4476 bool
CompareStrings(JSContext * cx,JSString * str1,JSString * str2,int32_t * result)4477 js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result)
4478 {
4479     MOZ_ASSERT(str1);
4480     MOZ_ASSERT(str2);
4481 
4482     if (str1 == str2) {
4483         *result = 0;
4484         return true;
4485     }
4486 
4487     JSLinearString* linear1 = str1->ensureLinear(cx);
4488     if (!linear1)
4489         return false;
4490 
4491     JSLinearString* linear2 = str2->ensureLinear(cx);
4492     if (!linear2)
4493         return false;
4494 
4495     *result = CompareStringsImpl(linear1, linear2);
4496     return true;
4497 }
4498 
4499 int32_t
CompareAtoms(JSAtom * atom1,JSAtom * atom2)4500 js::CompareAtoms(JSAtom* atom1, JSAtom* atom2)
4501 {
4502     return CompareStringsImpl(atom1, atom2);
4503 }
4504 
4505 bool
StringEqualsAscii(JSLinearString * str,const char * asciiBytes)4506 js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes)
4507 {
4508     size_t length = strlen(asciiBytes);
4509 #ifdef DEBUG
4510     for (size_t i = 0; i != length; ++i)
4511         MOZ_ASSERT(unsigned(asciiBytes[i]) <= 127);
4512 #endif
4513     if (length != str->length())
4514         return false;
4515 
4516     const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
4517 
4518     AutoCheckCannotGC nogc;
4519     return str->hasLatin1Chars()
4520            ? PodEqual(latin1, str->latin1Chars(nogc), length)
4521            : EqualChars(latin1, str->twoByteChars(nogc), length);
4522 }
4523 
4524 size_t
js_strlen(const char16_t * s)4525 js_strlen(const char16_t* s)
4526 {
4527     const char16_t* t;
4528 
4529     for (t = s; *t != 0; t++)
4530         continue;
4531     return (size_t)(t - s);
4532 }
4533 
4534 int32_t
js_strcmp(const char16_t * lhs,const char16_t * rhs)4535 js_strcmp(const char16_t* lhs, const char16_t* rhs)
4536 {
4537     while (true) {
4538         if (*lhs != *rhs)
4539             return int32_t(*lhs) - int32_t(*rhs);
4540         if (*lhs == 0)
4541             return 0;
4542         ++lhs, ++rhs;
4543     }
4544 }
4545 
4546 UniquePtr<char[], JS::FreePolicy>
DuplicateString(js::ExclusiveContext * cx,const char * s)4547 js::DuplicateString(js::ExclusiveContext* cx, const char* s)
4548 {
4549     size_t n = strlen(s) + 1;
4550     auto ret = cx->make_pod_array<char>(n);
4551     if (!ret)
4552         return ret;
4553     PodCopy(ret.get(), s, n);
4554     return ret;
4555 }
4556 
4557 UniquePtr<char16_t[], JS::FreePolicy>
DuplicateString(js::ExclusiveContext * cx,const char16_t * s)4558 js::DuplicateString(js::ExclusiveContext* cx, const char16_t* s)
4559 {
4560     size_t n = js_strlen(s) + 1;
4561     auto ret = cx->make_pod_array<char16_t>(n);
4562     if (!ret)
4563         return ret;
4564     PodCopy(ret.get(), s, n);
4565     return ret;
4566 }
4567 
4568 UniquePtr<char16_t[], JS::FreePolicy>
DuplicateString(const char16_t * s)4569 js::DuplicateString(const char16_t* s)
4570 {
4571     size_t n = js_strlen(s) + 1;
4572     UniquePtr<char16_t[], JS::FreePolicy> ret(js_pod_malloc<char16_t>(n));
4573     if (!ret)
4574         return nullptr;
4575     PodCopy(ret.get(), s, n);
4576     return ret;
4577 }
4578 
4579 template <typename CharT>
4580 const CharT*
js_strchr_limit(const CharT * s,char16_t c,const CharT * limit)4581 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit)
4582 {
4583     while (s < limit) {
4584         if (*s == c)
4585             return s;
4586         s++;
4587     }
4588     return nullptr;
4589 }
4590 
4591 template const Latin1Char*
4592 js_strchr_limit(const Latin1Char* s, char16_t c, const Latin1Char* limit);
4593 
4594 template const char16_t*
4595 js_strchr_limit(const char16_t* s, char16_t c, const char16_t* limit);
4596 
4597 char16_t*
InflateString(ExclusiveContext * cx,const char * bytes,size_t * lengthp)4598 js::InflateString(ExclusiveContext* cx, const char* bytes, size_t* lengthp)
4599 {
4600     size_t nchars;
4601     char16_t* chars;
4602     size_t nbytes = *lengthp;
4603 
4604     nchars = nbytes;
4605     chars = cx->pod_malloc<char16_t>(nchars + 1);
4606     if (!chars)
4607         goto bad;
4608     for (size_t i = 0; i < nchars; i++)
4609         chars[i] = (unsigned char) bytes[i];
4610     *lengthp = nchars;
4611     chars[nchars] = 0;
4612     return chars;
4613 
4614   bad:
4615     // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4616     // on errors.
4617     *lengthp = 0;
4618     return nullptr;
4619 }
4620 
4621 template <typename CharT>
4622 bool
DeflateStringToBuffer(JSContext * maybecx,const CharT * src,size_t srclen,char * dst,size_t * dstlenp)4623 js::DeflateStringToBuffer(JSContext* maybecx, const CharT* src, size_t srclen,
4624                           char* dst, size_t* dstlenp)
4625 {
4626     size_t dstlen = *dstlenp;
4627     if (srclen > dstlen) {
4628         for (size_t i = 0; i < dstlen; i++)
4629             dst[i] = char(src[i]);
4630         if (maybecx) {
4631             AutoSuppressGC suppress(maybecx);
4632             JS_ReportErrorNumber(maybecx, GetErrorMessage, nullptr,
4633                                  JSMSG_BUFFER_TOO_SMALL);
4634         }
4635         return false;
4636     }
4637     for (size_t i = 0; i < srclen; i++)
4638         dst[i] = char(src[i]);
4639     *dstlenp = srclen;
4640     return true;
4641 }
4642 
4643 template bool
4644 js::DeflateStringToBuffer(JSContext* maybecx, const Latin1Char* src, size_t srclen,
4645                           char* dst, size_t* dstlenp);
4646 
4647 template bool
4648 js::DeflateStringToBuffer(JSContext* maybecx, const char16_t* src, size_t srclen,
4649                           char* dst, size_t* dstlenp);
4650 
4651 #define ____ false
4652 
4653 /*
4654  * Identifier start chars:
4655  * -      36:    $
4656  * -  65..90: A..Z
4657  * -      95:    _
4658  * - 97..122: a..z
4659  */
4660 const bool js_isidstart[] = {
4661 /*       0     1     2     3     4     5     6     7     8     9  */
4662 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4663 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4664 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4665 /*  3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4666 /*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4667 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4668 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4669 /*  7 */ true, true, true, true, true, true, true, true, true, true,
4670 /*  8 */ true, true, true, true, true, true, true, true, true, true,
4671 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4672 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4673 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4674 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4675 };
4676 
4677 /*
4678  * Identifier chars:
4679  * -      36:    $
4680  * -  48..57: 0..9
4681  * -  65..90: A..Z
4682  * -      95:    _
4683  * - 97..122: a..z
4684  */
4685 const bool js_isident[] = {
4686 /*       0     1     2     3     4     5     6     7     8     9  */
4687 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4688 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4689 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4690 /*  3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4691 /*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4692 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
4693 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4694 /*  7 */ true, true, true, true, true, true, true, true, true, true,
4695 /*  8 */ true, true, true, true, true, true, true, true, true, true,
4696 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4697 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4698 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4699 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4700 };
4701 
4702 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4703 const bool js_isspace[] = {
4704 /*       0     1     2     3     4     5     6     7     8     9  */
4705 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
4706 /*  1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
4707 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4708 /*  3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
4709 /*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4710 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4711 /*  6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4712 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4713 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4714 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4715 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4716 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4717 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4718 };
4719 
4720 /*
4721  * Uri reserved chars + #:
4722  * - 35: #
4723  * - 36: $
4724  * - 38: &
4725  * - 43: +
4726  * - 44: ,
4727  * - 47: /
4728  * - 58: :
4729  * - 59: ;
4730  * - 61: =
4731  * - 63: ?
4732  * - 64: @
4733  */
4734 static const bool js_isUriReservedPlusPound[] = {
4735 /*       0     1     2     3     4     5     6     7     8     9  */
4736 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4737 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4738 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4739 /*  3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
4740 /*  4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
4741 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4742 /*  6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
4743 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4744 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4745 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4746 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4747 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4748 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4749 };
4750 
4751 /*
4752  * Uri unescaped chars:
4753  * -      33: !
4754  * -      39: '
4755  * -      40: (
4756  * -      41: )
4757  * -      42: *
4758  * -      45: -
4759  * -      46: .
4760  * -  48..57: 0-9
4761  * -  65..90: A-Z
4762  * -      95: _
4763  * - 97..122: a-z
4764  * -     126: ~
4765  */
4766 static const bool js_isUriUnescaped[] = {
4767 /*       0     1     2     3     4     5     6     7     8     9  */
4768 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4769 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4770 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4771 /*  3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
4772 /*  4 */ true, true, true, ____, ____, true, true, ____, true, true,
4773 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
4774 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4775 /*  7 */ true, true, true, true, true, true, true, true, true, true,
4776 /*  8 */ true, true, true, true, true, true, true, true, true, true,
4777 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4778 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4779 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4780 /* 12 */ true, true, true, ____, ____, ____, true, ____
4781 };
4782 
4783 #undef ____
4784 
4785 #define URI_CHUNK 64U
4786 
4787 static inline bool
TransferBufferToString(StringBuffer & sb,MutableHandleValue rval)4788 TransferBufferToString(StringBuffer& sb, MutableHandleValue rval)
4789 {
4790     JSString* str = sb.finishString();
4791     if (!str)
4792         return false;
4793     rval.setString(str);
4794     return true;
4795 }
4796 
4797 /*
4798  * ECMA 3, 15.1.3 URI Handling Function Properties
4799  *
4800  * The following are implementations of the algorithms
4801  * given in the ECMA specification for the hidden functions
4802  * 'Encode' and 'Decode'.
4803  */
4804 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4805 
4806 template <typename CharT>
4807 static EncodeResult
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet,const bool * unescapedSet2)4808 Encode(StringBuffer& sb, const CharT* chars, size_t length,
4809        const bool* unescapedSet, const bool* unescapedSet2)
4810 {
4811     static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4812 
4813     char16_t hexBuf[4];
4814     hexBuf[0] = '%';
4815     hexBuf[3] = 0;
4816 
4817     for (size_t k = 0; k < length; k++) {
4818         char16_t c = chars[k];
4819         if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
4820             if (!sb.append(c))
4821                 return Encode_Failure;
4822         } else {
4823             if (c >= 0xDC00 && c <= 0xDFFF)
4824                 return Encode_BadUri;
4825 
4826             uint32_t v;
4827             if (c < 0xD800 || c > 0xDBFF) {
4828                 v = c;
4829             } else {
4830                 k++;
4831                 if (k == length)
4832                     return Encode_BadUri;
4833 
4834                 char16_t c2 = chars[k];
4835                 if (c2 < 0xDC00 || c2 > 0xDFFF)
4836                     return Encode_BadUri;
4837 
4838                 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4839             }
4840             uint8_t utf8buf[4];
4841             size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4842             for (size_t j = 0; j < L; j++) {
4843                 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4844                 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4845                 if (!sb.append(hexBuf, 3))
4846                     return Encode_Failure;
4847             }
4848         }
4849     }
4850 
4851     return Encode_Success;
4852 }
4853 
4854 static bool
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,const bool * unescapedSet2,MutableHandleValue rval)4855 Encode(JSContext* cx, HandleLinearString str, const bool* unescapedSet,
4856        const bool* unescapedSet2, MutableHandleValue rval)
4857 {
4858     size_t length = str->length();
4859     if (length == 0) {
4860         rval.setString(cx->runtime()->emptyString);
4861         return true;
4862     }
4863 
4864     StringBuffer sb(cx);
4865     if (!sb.reserve(length))
4866         return false;
4867 
4868     EncodeResult res;
4869     if (str->hasLatin1Chars()) {
4870         AutoCheckCannotGC nogc;
4871         res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet, unescapedSet2);
4872     } else {
4873         AutoCheckCannotGC nogc;
4874         res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet, unescapedSet2);
4875     }
4876 
4877     if (res == Encode_Failure)
4878         return false;
4879 
4880     if (res == Encode_BadUri) {
4881         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI, nullptr);
4882         return false;
4883     }
4884 
4885     MOZ_ASSERT(res == Encode_Success);
4886     return TransferBufferToString(sb, rval);
4887 }
4888 
4889 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4890 
4891 template <typename CharT>
4892 static DecodeResult
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4893 Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reservedSet)
4894 {
4895     for (size_t k = 0; k < length; k++) {
4896         char16_t c = chars[k];
4897         if (c == '%') {
4898             size_t start = k;
4899             if ((k + 2) >= length)
4900                 return Decode_BadUri;
4901 
4902             if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4903                 return Decode_BadUri;
4904 
4905             uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4906             k += 2;
4907             if (!(B & 0x80)) {
4908                 c = char16_t(B);
4909             } else {
4910                 int n = 1;
4911                 while (B & (0x80 >> n))
4912                     n++;
4913 
4914                 if (n == 1 || n > 4)
4915                     return Decode_BadUri;
4916 
4917                 uint8_t octets[4];
4918                 octets[0] = (uint8_t)B;
4919                 if (k + 3 * (n - 1) >= length)
4920                     return Decode_BadUri;
4921 
4922                 for (int j = 1; j < n; j++) {
4923                     k++;
4924                     if (chars[k] != '%')
4925                         return Decode_BadUri;
4926 
4927                     if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4928                         return Decode_BadUri;
4929 
4930                     B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4931                     if ((B & 0xC0) != 0x80)
4932                         return Decode_BadUri;
4933 
4934                     k += 2;
4935                     octets[j] = char(B);
4936                 }
4937                 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4938                 if (v >= 0x10000) {
4939                     v -= 0x10000;
4940                     if (v > 0xFFFFF)
4941                         return Decode_BadUri;
4942 
4943                     c = char16_t((v & 0x3FF) + 0xDC00);
4944                     char16_t H = char16_t((v >> 10) + 0xD800);
4945                     if (!sb.append(H))
4946                         return Decode_Failure;
4947                 } else {
4948                     c = char16_t(v);
4949                 }
4950             }
4951             if (c < 128 && reservedSet && reservedSet[c]) {
4952                 if (!sb.append(chars + start, k - start + 1))
4953                     return Decode_Failure;
4954             } else {
4955                 if (!sb.append(c))
4956                     return Decode_Failure;
4957             }
4958         } else {
4959             if (!sb.append(c))
4960                 return Decode_Failure;
4961         }
4962     }
4963 
4964     return Decode_Success;
4965 }
4966 
4967 static bool
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4968 Decode(JSContext* cx, HandleLinearString str, const bool* reservedSet, MutableHandleValue rval)
4969 {
4970     size_t length = str->length();
4971     if (length == 0) {
4972         rval.setString(cx->runtime()->emptyString);
4973         return true;
4974     }
4975 
4976     StringBuffer sb(cx);
4977 
4978     DecodeResult res;
4979     if (str->hasLatin1Chars()) {
4980         AutoCheckCannotGC nogc;
4981         res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4982     } else {
4983         AutoCheckCannotGC nogc;
4984         res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4985     }
4986 
4987     if (res == Decode_Failure)
4988         return false;
4989 
4990     if (res == Decode_BadUri) {
4991         JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4992         return false;
4993     }
4994 
4995     MOZ_ASSERT(res == Decode_Success);
4996     return TransferBufferToString(sb, rval);
4997 }
4998 
4999 static bool
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)5000 str_decodeURI(JSContext* cx, unsigned argc, Value* vp)
5001 {
5002     CallArgs args = CallArgsFromVp(argc, vp);
5003     RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5004     if (!str)
5005         return false;
5006 
5007     return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
5008 }
5009 
5010 static bool
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)5011 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5012 {
5013     CallArgs args = CallArgsFromVp(argc, vp);
5014     RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5015     if (!str)
5016         return false;
5017 
5018     return Decode(cx, str, nullptr, args.rval());
5019 }
5020 
5021 static bool
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)5022 str_encodeURI(JSContext* cx, unsigned argc, Value* vp)
5023 {
5024     CallArgs args = CallArgsFromVp(argc, vp);
5025     RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5026     if (!str)
5027         return false;
5028 
5029     return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval());
5030 }
5031 
5032 static bool
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)5033 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5034 {
5035     CallArgs args = CallArgsFromVp(argc, vp);
5036     RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5037     if (!str)
5038         return false;
5039 
5040     return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval());
5041 }
5042 
5043 /*
5044  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
5045  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
5046  */
5047 uint32_t
OneUcs4ToUtf8Char(uint8_t * utf8Buffer,uint32_t ucs4Char)5048 js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
5049 {
5050     MOZ_ASSERT(ucs4Char <= 0x10FFFF);
5051 
5052     if (ucs4Char < 0x80) {
5053         utf8Buffer[0] = uint8_t(ucs4Char);
5054         return 1;
5055     }
5056 
5057     uint32_t a = ucs4Char >> 11;
5058     uint32_t utf8Length = 2;
5059     while (a) {
5060         a >>= 5;
5061         utf8Length++;
5062     }
5063 
5064     MOZ_ASSERT(utf8Length <= 4);
5065 
5066     uint32_t i = utf8Length;
5067     while (--i) {
5068         utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80);
5069         ucs4Char >>= 6;
5070     }
5071 
5072     utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char);
5073     return utf8Length;
5074 }
5075 
5076 size_t
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,JSLinearString * str,uint32_t quote)5077 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, JSLinearString* str,
5078                          uint32_t quote)
5079 {
5080     size_t len = str->length();
5081     AutoCheckCannotGC nogc;
5082     return str->hasLatin1Chars()
5083            ? PutEscapedStringImpl(buffer, bufferSize, out, str->latin1Chars(nogc), len, quote)
5084            : PutEscapedStringImpl(buffer, bufferSize, out, str->twoByteChars(nogc), len, quote);
5085 }
5086 
5087 template <typename CharT>
5088 size_t
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,const CharT * chars,size_t length,uint32_t quote)5089 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars,
5090                          size_t length, uint32_t quote)
5091 {
5092     enum {
5093         STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
5094     } state;
5095 
5096     MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
5097     MOZ_ASSERT_IF(!buffer, bufferSize == 0);
5098     MOZ_ASSERT_IF(out, !buffer);
5099 
5100     if (bufferSize == 0)
5101         buffer = nullptr;
5102     else
5103         bufferSize--;
5104 
5105     const CharT* charsEnd = chars + length;
5106     size_t n = 0;
5107     state = FIRST_QUOTE;
5108     unsigned shift = 0;
5109     unsigned hex = 0;
5110     unsigned u = 0;
5111     char c = 0;  /* to quell GCC warnings */
5112 
5113     for (;;) {
5114         switch (state) {
5115           case STOP:
5116             goto stop;
5117           case FIRST_QUOTE:
5118             state = CHARS;
5119             goto do_quote;
5120           case LAST_QUOTE:
5121             state = STOP;
5122           do_quote:
5123             if (quote == 0)
5124                 continue;
5125             c = (char)quote;
5126             break;
5127           case CHARS:
5128             if (chars == charsEnd) {
5129                 state = LAST_QUOTE;
5130                 continue;
5131             }
5132             u = *chars++;
5133             if (u < ' ') {
5134                 if (u != 0) {
5135                     const char* escape = strchr(js_EscapeMap, (int)u);
5136                     if (escape) {
5137                         u = escape[1];
5138                         goto do_escape;
5139                     }
5140                 }
5141                 goto do_hex_escape;
5142             }
5143             if (u < 127) {
5144                 if (u == quote || u == '\\')
5145                     goto do_escape;
5146                 c = (char)u;
5147             } else if (u < 0x100) {
5148                 goto do_hex_escape;
5149             } else {
5150                 shift = 16;
5151                 hex = u;
5152                 u = 'u';
5153                 goto do_escape;
5154             }
5155             break;
5156           do_hex_escape:
5157             shift = 8;
5158             hex = u;
5159             u = 'x';
5160           do_escape:
5161             c = '\\';
5162             state = ESCAPE_START;
5163             break;
5164           case ESCAPE_START:
5165             MOZ_ASSERT(' ' <= u && u < 127);
5166             c = (char)u;
5167             state = ESCAPE_MORE;
5168             break;
5169           case ESCAPE_MORE:
5170             if (shift == 0) {
5171                 state = CHARS;
5172                 continue;
5173             }
5174             shift -= 4;
5175             u = 0xF & (hex >> shift);
5176             c = (char)(u + (u < 10 ? '0' : 'A' - 10));
5177             break;
5178         }
5179         if (buffer) {
5180             MOZ_ASSERT(n <= bufferSize);
5181             if (n != bufferSize) {
5182                 buffer[n] = c;
5183             } else {
5184                 buffer[n] = '\0';
5185                 buffer = nullptr;
5186             }
5187         } else if (out) {
5188             if (out->put(&c, 1) < 0)
5189                 return size_t(-1);
5190         }
5191         n++;
5192     }
5193   stop:
5194     if (buffer)
5195         buffer[n] = '\0';
5196     return n;
5197 }
5198 
5199 template size_t
5200 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const Latin1Char* chars,
5201                          size_t length, uint32_t quote);
5202 
5203 template size_t
5204 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char* chars,
5205                          size_t length, uint32_t quote);
5206 
5207 template size_t
5208 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char16_t* chars,
5209                          size_t length, uint32_t quote);
5210 
5211 template size_t
5212 js::PutEscapedString(char* buffer, size_t bufferSize, const Latin1Char* chars, size_t length,
5213                      uint32_t quote);
5214 
5215 template size_t
5216 js::PutEscapedString(char* buffer, size_t bufferSize, const char16_t* chars, size_t length,
5217                      uint32_t quote);
5218