1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jsstr.h"
8
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
17
18 #include <ctype.h>
19 #include <string.h>
20
21 #include "jsapi.h"
22 #include "jsarray.h"
23 #include "jsatom.h"
24 #include "jsbool.h"
25 #include "jscntxt.h"
26 #include "jsgc.h"
27 #include "jsnum.h"
28 #include "jsobj.h"
29 #include "jsopcode.h"
30 #include "jstypes.h"
31 #include "jsutil.h"
32
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
35 #include "jit/InlinableNatives.h"
36 #include "js/Conversions.h"
37 #if ENABLE_INTL_API
38 #include "unicode/unorm.h"
39 #endif
40 #include "vm/GlobalObject.h"
41 #include "vm/Interpreter.h"
42 #include "vm/Opcodes.h"
43 #include "vm/Printer.h"
44 #include "vm/RegExpObject.h"
45 #include "vm/RegExpStatics.h"
46 #include "vm/ScopeObject.h"
47 #include "vm/StringBuffer.h"
48
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
52 #include "vm/TypeInference-inl.h"
53
54 using namespace js;
55 using namespace js::gc;
56 using namespace js::unicode;
57
58 using JS::Symbol;
59 using JS::SymbolCode;
60 using JS::ToInt32;
61 using JS::ToUint32;
62
63 using mozilla::AssertedCast;
64 using mozilla::CheckedInt;
65 using mozilla::IsNaN;
66 using mozilla::IsNegativeZero;
67 using mozilla::IsSame;
68 using mozilla::Move;
69 using mozilla::PodCopy;
70 using mozilla::PodEqual;
71 using mozilla::RangedPtr;
72 using mozilla::UniquePtr;
73
74 using JS::AutoCheckCannotGC;
75
76 static JSLinearString*
ArgToRootedString(JSContext * cx,const CallArgs & args,unsigned argno)77 ArgToRootedString(JSContext* cx, const CallArgs& args, unsigned argno)
78 {
79 if (argno >= args.length())
80 return cx->names().undefined;
81
82 JSString* str = ToString<CanGC>(cx, args[argno]);
83 if (!str)
84 return nullptr;
85
86 args[argno].setString(str);
87 return str->ensureLinear(cx);
88 }
89
90 /*
91 * Forward declarations for URI encode/decode and helper routines
92 */
93 static bool
94 str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
95
96 static bool
97 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
98
99 static bool
100 str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
101
102 static bool
103 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
104
105 /*
106 * Global string methods
107 */
108
109
110 /* ES5 B.2.1 */
111 template <typename CharT>
112 static Latin1Char*
Escape(JSContext * cx,const CharT * chars,uint32_t length,uint32_t * newLengthOut)113 Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut)
114 {
115 static const uint8_t shouldPassThrough[128] = {
116 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
119 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
120 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
121 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
122 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
123 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
124 };
125
126 /* Take a first pass and see how big the result string will need to be. */
127 uint32_t newLength = length;
128 for (size_t i = 0; i < length; i++) {
129 char16_t ch = chars[i];
130 if (ch < 128 && shouldPassThrough[ch])
131 continue;
132
133 /* The character will be encoded as %XX or %uXXXX. */
134 newLength += (ch < 256) ? 2 : 5;
135
136 /*
137 * newlength is incremented by at most 5 on each iteration, so worst
138 * case newlength == length * 6. This can't overflow.
139 */
140 static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
141 "newlength must not overflow");
142 }
143
144 Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1);
145 if (!newChars)
146 return nullptr;
147
148 static const char digits[] = "0123456789ABCDEF";
149
150 size_t i, ni;
151 for (i = 0, ni = 0; i < length; i++) {
152 char16_t ch = chars[i];
153 if (ch < 128 && shouldPassThrough[ch]) {
154 newChars[ni++] = ch;
155 } else if (ch < 256) {
156 newChars[ni++] = '%';
157 newChars[ni++] = digits[ch >> 4];
158 newChars[ni++] = digits[ch & 0xF];
159 } else {
160 newChars[ni++] = '%';
161 newChars[ni++] = 'u';
162 newChars[ni++] = digits[ch >> 12];
163 newChars[ni++] = digits[(ch & 0xF00) >> 8];
164 newChars[ni++] = digits[(ch & 0xF0) >> 4];
165 newChars[ni++] = digits[ch & 0xF];
166 }
167 }
168 MOZ_ASSERT(ni == newLength);
169 newChars[newLength] = 0;
170
171 *newLengthOut = newLength;
172 return newChars;
173 }
174
175 static bool
str_escape(JSContext * cx,unsigned argc,Value * vp)176 str_escape(JSContext* cx, unsigned argc, Value* vp)
177 {
178 CallArgs args = CallArgsFromVp(argc, vp);
179
180 JSLinearString* str = ArgToRootedString(cx, args, 0);
181 if (!str)
182 return false;
183
184 ScopedJSFreePtr<Latin1Char> newChars;
185 uint32_t newLength = 0; // initialize to silence GCC warning
186 if (str->hasLatin1Chars()) {
187 AutoCheckCannotGC nogc;
188 newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength);
189 } else {
190 AutoCheckCannotGC nogc;
191 newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength);
192 }
193
194 if (!newChars)
195 return false;
196
197 JSString* res = NewString<CanGC>(cx, newChars.get(), newLength);
198 if (!res)
199 return false;
200
201 newChars.forget();
202 args.rval().setString(res);
203 return true;
204 }
205
206 template <typename CharT>
207 static inline bool
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)208 Unhex4(const RangedPtr<const CharT> chars, char16_t* result)
209 {
210 char16_t a = chars[0],
211 b = chars[1],
212 c = chars[2],
213 d = chars[3];
214
215 if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
216 return false;
217
218 *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
219 return true;
220 }
221
222 template <typename CharT>
223 static inline bool
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)224 Unhex2(const RangedPtr<const CharT> chars, char16_t* result)
225 {
226 char16_t a = chars[0],
227 b = chars[1];
228
229 if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
230 return false;
231
232 *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
233 return true;
234 }
235
236 template <typename CharT>
237 static bool
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)238 Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars)
239 {
240 /*
241 * NB: use signed integers for length/index to allow simple length
242 * comparisons without unsigned-underflow hazards.
243 */
244 static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer");
245 int length = AssertedCast<int>(chars.length());
246
247 /*
248 * Note that the spec algorithm has been optimized to avoid building
249 * a string in the case where no escapes are present.
250 */
251
252 /* Step 4. */
253 int k = 0;
254 bool building = false;
255
256 /* Step 5. */
257 while (k < length) {
258 /* Step 6. */
259 char16_t c = chars[k];
260
261 /* Step 7. */
262 if (c != '%')
263 goto step_18;
264
265 /* Step 8. */
266 if (k > length - 6)
267 goto step_14;
268
269 /* Step 9. */
270 if (chars[k + 1] != 'u')
271 goto step_14;
272
273 #define ENSURE_BUILDING \
274 do { \
275 if (!building) { \
276 building = true; \
277 if (!sb.reserve(length)) \
278 return false; \
279 sb.infallibleAppend(chars.start().get(), k); \
280 } \
281 } while(false);
282
283 /* Step 10-13. */
284 if (Unhex4(chars.start() + k + 2, &c)) {
285 ENSURE_BUILDING;
286 k += 5;
287 goto step_18;
288 }
289
290 step_14:
291 /* Step 14. */
292 if (k > length - 3)
293 goto step_18;
294
295 /* Step 15-17. */
296 if (Unhex2(chars.start() + k + 1, &c)) {
297 ENSURE_BUILDING;
298 k += 2;
299 }
300
301 step_18:
302 if (building && !sb.append(c))
303 return false;
304
305 /* Step 19. */
306 k += 1;
307 }
308
309 return true;
310 #undef ENSURE_BUILDING
311 }
312
313 /* ES5 B.2.2 */
314 static bool
str_unescape(JSContext * cx,unsigned argc,Value * vp)315 str_unescape(JSContext* cx, unsigned argc, Value* vp)
316 {
317 CallArgs args = CallArgsFromVp(argc, vp);
318
319 /* Step 1. */
320 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
321 if (!str)
322 return false;
323
324 /* Step 3. */
325 StringBuffer sb(cx);
326 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars())
327 return false;
328
329 if (str->hasLatin1Chars()) {
330 AutoCheckCannotGC nogc;
331 if (!Unescape(sb, str->latin1Range(nogc)))
332 return false;
333 } else {
334 AutoCheckCannotGC nogc;
335 if (!Unescape(sb, str->twoByteRange(nogc)))
336 return false;
337 }
338
339 JSLinearString* result;
340 if (!sb.empty()) {
341 result = sb.finishString();
342 if (!result)
343 return false;
344 } else {
345 result = str;
346 }
347
348 args.rval().setString(result);
349 return true;
350 }
351
352 #if JS_HAS_UNEVAL
353 static bool
str_uneval(JSContext * cx,unsigned argc,Value * vp)354 str_uneval(JSContext* cx, unsigned argc, Value* vp)
355 {
356 CallArgs args = CallArgsFromVp(argc, vp);
357 JSString* str = ValueToSource(cx, args.get(0));
358 if (!str)
359 return false;
360
361 args.rval().setString(str);
362 return true;
363 }
364 #endif
365
366 static const JSFunctionSpec string_functions[] = {
367 JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
368 JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
369 #if JS_HAS_UNEVAL
370 JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
371 #endif
372 JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
373 JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
374 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1, JSPROP_RESOLVING),
375 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1, JSPROP_RESOLVING),
376
377 JS_FS_END
378 };
379
380 static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
381
382 static bool
str_enumerate(JSContext * cx,HandleObject obj)383 str_enumerate(JSContext* cx, HandleObject obj)
384 {
385 RootedString str(cx, obj->as<StringObject>().unbox());
386 RootedValue value(cx);
387 for (size_t i = 0, length = str->length(); i < length; i++) {
388 JSString* str1 = NewDependentString(cx, str, i, 1);
389 if (!str1)
390 return false;
391 value.setString(str1);
392 if (!DefineElement(cx, obj, i, value, nullptr, nullptr,
393 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
394 {
395 return false;
396 }
397 }
398
399 return true;
400 }
401
402 static bool
str_mayResolve(const JSAtomState &,jsid id,JSObject *)403 str_mayResolve(const JSAtomState&, jsid id, JSObject*)
404 {
405 // str_resolve ignores non-integer ids.
406 return JSID_IS_INT(id);
407 }
408
409 static bool
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)410 str_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp)
411 {
412 if (!JSID_IS_INT(id))
413 return true;
414
415 RootedString str(cx, obj->as<StringObject>().unbox());
416
417 int32_t slot = JSID_TO_INT(id);
418 if ((size_t)slot < str->length()) {
419 JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
420 if (!str1)
421 return false;
422 RootedValue value(cx, StringValue(str1));
423 if (!DefineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
424 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
425 {
426 return false;
427 }
428 *resolvedp = true;
429 }
430 return true;
431 }
432
433 const Class StringObject::class_ = {
434 js_String_str,
435 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
436 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
437 nullptr, /* addProperty */
438 nullptr, /* delProperty */
439 nullptr, /* getProperty */
440 nullptr, /* setProperty */
441 str_enumerate,
442 str_resolve,
443 str_mayResolve
444 };
445
446 /*
447 * Returns a JSString * for the |this| value associated with 'call', or throws
448 * a TypeError if |this| is null or undefined. This algorithm is the same as
449 * calling CheckObjectCoercible(this), then returning ToString(this), as all
450 * String.prototype.* methods do (other than toString and valueOf).
451 */
452 static MOZ_ALWAYS_INLINE JSString*
ThisToStringForStringProto(JSContext * cx,CallReceiver call)453 ThisToStringForStringProto(JSContext* cx, CallReceiver call)
454 {
455 JS_CHECK_RECURSION(cx, return nullptr);
456
457 if (call.thisv().isString())
458 return call.thisv().toString();
459
460 if (call.thisv().isObject()) {
461 RootedObject obj(cx, &call.thisv().toObject());
462 if (obj->is<StringObject>()) {
463 StringObject* nobj = &obj->as<StringObject>();
464 Rooted<jsid> id(cx, NameToId(cx->names().toString));
465 if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, str_toString)) {
466 JSString* str = nobj->unbox();
467 call.setThis(StringValue(str));
468 return str;
469 }
470 }
471 } else if (call.thisv().isNullOrUndefined()) {
472 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
473 call.thisv().isNull() ? "null" : "undefined", "object");
474 return nullptr;
475 }
476
477 JSString* str = ToStringSlow<CanGC>(cx, call.thisv());
478 if (!str)
479 return nullptr;
480
481 call.setThis(StringValue(str));
482 return str;
483 }
484
485 MOZ_ALWAYS_INLINE bool
IsString(HandleValue v)486 IsString(HandleValue v)
487 {
488 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
489 }
490
491 #if JS_HAS_TOSOURCE
492
493 MOZ_ALWAYS_INLINE bool
str_toSource_impl(JSContext * cx,const CallArgs & args)494 str_toSource_impl(JSContext* cx, const CallArgs& args)
495 {
496 MOZ_ASSERT(IsString(args.thisv()));
497
498 Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
499 if (!str)
500 return false;
501
502 str = QuoteString(cx, str, '"');
503 if (!str)
504 return false;
505
506 StringBuffer sb(cx);
507 if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
508 return false;
509
510 str = sb.finishString();
511 if (!str)
512 return false;
513 args.rval().setString(str);
514 return true;
515 }
516
517 static bool
str_toSource(JSContext * cx,unsigned argc,Value * vp)518 str_toSource(JSContext* cx, unsigned argc, Value* vp)
519 {
520 CallArgs args = CallArgsFromVp(argc, vp);
521 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
522 }
523
524 #endif /* JS_HAS_TOSOURCE */
525
526 MOZ_ALWAYS_INLINE bool
str_toString_impl(JSContext * cx,const CallArgs & args)527 str_toString_impl(JSContext* cx, const CallArgs& args)
528 {
529 MOZ_ASSERT(IsString(args.thisv()));
530
531 args.rval().setString(args.thisv().isString()
532 ? args.thisv().toString()
533 : args.thisv().toObject().as<StringObject>().unbox());
534 return true;
535 }
536
537 bool
str_toString(JSContext * cx,unsigned argc,Value * vp)538 js::str_toString(JSContext* cx, unsigned argc, Value* vp)
539 {
540 CallArgs args = CallArgsFromVp(argc, vp);
541 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
542 }
543
544 /*
545 * Java-like string native methods.
546 */
547
548 JSString*
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)549 js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt)
550 {
551 MOZ_ASSERT(0 <= beginInt);
552 MOZ_ASSERT(0 <= lengthInt);
553 MOZ_ASSERT(uint32_t(beginInt) <= str->length());
554 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
555
556 uint32_t begin = beginInt;
557 uint32_t len = lengthInt;
558
559 /*
560 * Optimization for one level deep ropes.
561 * This is common for the following pattern:
562 *
563 * while() {
564 * text = text.substr(0, x) + "bla" + text.substr(x)
565 * test.charCodeAt(x + 1)
566 * }
567 */
568 if (str->isRope()) {
569 JSRope* rope = &str->asRope();
570
571 /* Substring is totally in leftChild of rope. */
572 if (begin + len <= rope->leftChild()->length())
573 return NewDependentString(cx, rope->leftChild(), begin, len);
574
575 /* Substring is totally in rightChild of rope. */
576 if (begin >= rope->leftChild()->length()) {
577 begin -= rope->leftChild()->length();
578 return NewDependentString(cx, rope->rightChild(), begin, len);
579 }
580
581 /*
582 * Requested substring is partly in the left and partly in right child.
583 * Create a rope of substrings for both childs.
584 */
585 MOZ_ASSERT(begin < rope->leftChild()->length() &&
586 begin + len > rope->leftChild()->length());
587
588 size_t lhsLength = rope->leftChild()->length() - begin;
589 size_t rhsLength = begin + len - rope->leftChild()->length();
590
591 Rooted<JSRope*> ropeRoot(cx, rope);
592 RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
593 if (!lhs)
594 return nullptr;
595
596 RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
597 if (!rhs)
598 return nullptr;
599
600 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
601 }
602
603 return NewDependentString(cx, str, begin, len);
604 }
605
606 template <typename CharT>
607 static JSString*
ToLowerCase(JSContext * cx,JSLinearString * str)608 ToLowerCase(JSContext* cx, JSLinearString* str)
609 {
610 // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
611 // is a Latin1 string, the output is also a Latin1 string.
612 UniquePtr<CharT[], JS::FreePolicy> newChars;
613 size_t length = str->length();
614 {
615 AutoCheckCannotGC nogc;
616 const CharT* chars = str->chars<CharT>(nogc);
617
618 // Look for the first upper case character.
619 size_t i = 0;
620 for (; i < length; i++) {
621 char16_t c = chars[i];
622 if (unicode::CanLowerCase(c))
623 break;
624 }
625
626 // If all characters are lower case, return the input string.
627 if (i == length)
628 return str;
629
630 newChars = cx->make_pod_array<CharT>(length + 1);
631 if (!newChars)
632 return nullptr;
633
634 PodCopy(newChars.get(), chars, i);
635
636 for (; i < length; i++) {
637 char16_t c = unicode::ToLowerCase(chars[i]);
638 MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
639 newChars[i] = c;
640 }
641
642 newChars[length] = 0;
643 }
644
645 JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
646 if (!res)
647 return nullptr;
648
649 newChars.release();
650 return res;
651 }
652
653 static inline bool
ToLowerCaseHelper(JSContext * cx,CallReceiver call)654 ToLowerCaseHelper(JSContext* cx, CallReceiver call)
655 {
656 RootedString str(cx, ThisToStringForStringProto(cx, call));
657 if (!str)
658 return false;
659
660 JSLinearString* linear = str->ensureLinear(cx);
661 if (!linear)
662 return false;
663
664 if (linear->hasLatin1Chars())
665 str = ToLowerCase<Latin1Char>(cx, linear);
666 else
667 str = ToLowerCase<char16_t>(cx, linear);
668 if (!str)
669 return false;
670
671 call.rval().setString(str);
672 return true;
673 }
674
675 bool
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)676 js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
677 {
678 return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
679 }
680
681 static bool
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)682 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
683 {
684 CallArgs args = CallArgsFromVp(argc, vp);
685
686 /*
687 * Forcefully ignore the first (or any) argument and return toLowerCase(),
688 * ECMA has reserved that argument, presumably for defining the locale.
689 */
690 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
691 RootedString str(cx, ThisToStringForStringProto(cx, args));
692 if (!str)
693 return false;
694
695 RootedValue result(cx);
696 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
697 return false;
698
699 args.rval().set(result);
700 return true;
701 }
702
703 return ToLowerCaseHelper(cx, args);
704 }
705
706 template <typename DestChar, typename SrcChar>
707 static void
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t firstLowerCase,size_t length)708 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
709 {
710 MOZ_ASSERT(firstLowerCase < length);
711
712 for (size_t i = 0; i < firstLowerCase; i++)
713 destChars[i] = srcChars[i];
714
715 for (size_t i = firstLowerCase; i < length; i++) {
716 char16_t c = unicode::ToUpperCase(srcChars[i]);
717 MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
718 destChars[i] = c;
719 }
720
721 destChars[length] = '\0';
722 }
723
724 template <typename CharT>
725 static JSString*
ToUpperCase(JSContext * cx,JSLinearString * str)726 ToUpperCase(JSContext* cx, JSLinearString* str)
727 {
728 typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
729 typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
730
731 mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
732 size_t length = str->length();
733 {
734 AutoCheckCannotGC nogc;
735 const CharT* chars = str->chars<CharT>(nogc);
736
737 // Look for the first lower case character.
738 size_t i = 0;
739 for (; i < length; i++) {
740 char16_t c = chars[i];
741 if (unicode::CanUpperCase(c))
742 break;
743 }
744
745 // If all characters are upper case, return the input string.
746 if (i == length)
747 return str;
748
749 // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
750 // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
751 // upper case characters are not in the Latin1 range.
752 bool resultIsLatin1;
753 if (IsSame<CharT, Latin1Char>::value) {
754 resultIsLatin1 = true;
755 for (size_t j = i; j < length; j++) {
756 Latin1Char c = chars[j];
757 if (c == 0xb5 || c == 0xff) {
758 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
759 resultIsLatin1 = false;
760 break;
761 } else {
762 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
763 }
764 }
765 } else {
766 resultIsLatin1 = false;
767 }
768
769 if (resultIsLatin1) {
770 Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
771 if (!buf)
772 return nullptr;
773
774 ToUpperCaseImpl(buf.get(), chars, i, length);
775 newChars.construct<Latin1CharPtr>(Move(buf));
776 } else {
777 TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
778 if (!buf)
779 return nullptr;
780
781 ToUpperCaseImpl(buf.get(), chars, i, length);
782 newChars.construct<TwoByteCharPtr>(Move(buf));
783 }
784 }
785
786 JSString* res;
787 if (newChars.constructed<Latin1CharPtr>()) {
788 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
789 if (!res)
790 return nullptr;
791
792 newChars.ref<Latin1CharPtr>().release();
793 } else {
794 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
795 if (!res)
796 return nullptr;
797
798 newChars.ref<TwoByteCharPtr>().release();
799 }
800
801 return res;
802 }
803
804 static bool
ToUpperCaseHelper(JSContext * cx,CallReceiver call)805 ToUpperCaseHelper(JSContext* cx, CallReceiver call)
806 {
807 RootedString str(cx, ThisToStringForStringProto(cx, call));
808 if (!str)
809 return false;
810
811 JSLinearString* linear = str->ensureLinear(cx);
812 if (!linear)
813 return false;
814
815 if (linear->hasLatin1Chars())
816 str = ToUpperCase<Latin1Char>(cx, linear);
817 else
818 str = ToUpperCase<char16_t>(cx, linear);
819 if (!str)
820 return false;
821
822 call.rval().setString(str);
823 return true;
824 }
825
826 bool
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)827 js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
828 {
829 return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
830 }
831
832 static bool
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)833 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
834 {
835 CallArgs args = CallArgsFromVp(argc, vp);
836
837 /*
838 * Forcefully ignore the first (or any) argument and return toUpperCase(),
839 * ECMA has reserved that argument, presumably for defining the locale.
840 */
841 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
842 RootedString str(cx, ThisToStringForStringProto(cx, args));
843 if (!str)
844 return false;
845
846 RootedValue result(cx);
847 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
848 return false;
849
850 args.rval().set(result);
851 return true;
852 }
853
854 return ToUpperCaseHelper(cx, args);
855 }
856
857 #if !EXPOSE_INTL_API
858 static bool
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)859 str_localeCompare(JSContext* cx, unsigned argc, Value* vp)
860 {
861 CallArgs args = CallArgsFromVp(argc, vp);
862 RootedString str(cx, ThisToStringForStringProto(cx, args));
863 if (!str)
864 return false;
865
866 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
867 if (!thatStr)
868 return false;
869
870 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
871 RootedValue result(cx);
872 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
873 return false;
874
875 args.rval().set(result);
876 return true;
877 }
878
879 int32_t result;
880 if (!CompareStrings(cx, str, thatStr, &result))
881 return false;
882
883 args.rval().setInt32(result);
884 return true;
885 }
886 #endif
887
888 #if EXPOSE_INTL_API
889 /* ES6 20140210 draft 21.1.3.12. */
890 static bool
str_normalize(JSContext * cx,unsigned argc,Value * vp)891 str_normalize(JSContext* cx, unsigned argc, Value* vp)
892 {
893 CallArgs args = CallArgsFromVp(argc, vp);
894
895 // Steps 1-3.
896 RootedString str(cx, ThisToStringForStringProto(cx, args));
897 if (!str)
898 return false;
899
900 // Step 4.
901 UNormalizationMode form;
902 if (!args.hasDefined(0)) {
903 form = UNORM_NFC;
904 } else {
905 // Steps 5-6.
906 RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
907 if (!formStr)
908 return false;
909
910 // Step 7.
911 if (EqualStrings(formStr, cx->names().NFC)) {
912 form = UNORM_NFC;
913 } else if (EqualStrings(formStr, cx->names().NFD)) {
914 form = UNORM_NFD;
915 } else if (EqualStrings(formStr, cx->names().NFKC)) {
916 form = UNORM_NFKC;
917 } else if (EqualStrings(formStr, cx->names().NFKD)) {
918 form = UNORM_NFKD;
919 } else {
920 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr,
921 JSMSG_INVALID_NORMALIZE_FORM);
922 return false;
923 }
924 }
925
926 // Step 8.
927 AutoStableStringChars stableChars(cx);
928 if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
929 return false;
930
931 static const size_t INLINE_CAPACITY = 32;
932
933 const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().start().get());
934 int32_t srcLen = AssertedCast<int32_t>(str->length());
935 Vector<char16_t, INLINE_CAPACITY> chars(cx);
936 if (!chars.resize(INLINE_CAPACITY))
937 return false;
938
939 UErrorCode status = U_ZERO_ERROR;
940 int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
941 Char16ToUChar(chars.begin()), INLINE_CAPACITY,
942 &status);
943 if (status == U_BUFFER_OVERFLOW_ERROR) {
944 if (!chars.resize(size))
945 return false;
946 status = U_ZERO_ERROR;
947 #ifdef DEBUG
948 int32_t finalSize =
949 #endif
950 unorm_normalize(srcChars, srcLen, form, 0,
951 Char16ToUChar(chars.begin()), size,
952 &status);
953 MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
954 }
955 if (U_FAILURE(status))
956 return false;
957
958 JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
959 if (!ns)
960 return false;
961
962 // Step 9.
963 args.rval().setString(ns);
964 return true;
965 }
966 #endif
967
968 bool
str_charAt(JSContext * cx,unsigned argc,Value * vp)969 js::str_charAt(JSContext* cx, unsigned argc, Value* vp)
970 {
971 CallArgs args = CallArgsFromVp(argc, vp);
972
973 RootedString str(cx);
974 size_t i;
975 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
976 str = args.thisv().toString();
977 i = size_t(args[0].toInt32());
978 if (i >= str->length())
979 goto out_of_range;
980 } else {
981 str = ThisToStringForStringProto(cx, args);
982 if (!str)
983 return false;
984
985 double d = 0.0;
986 if (args.length() > 0 && !ToInteger(cx, args[0], &d))
987 return false;
988
989 if (d < 0 || str->length() <= d)
990 goto out_of_range;
991 i = size_t(d);
992 }
993
994 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
995 if (!str)
996 return false;
997 args.rval().setString(str);
998 return true;
999
1000 out_of_range:
1001 args.rval().setString(cx->runtime()->emptyString);
1002 return true;
1003 }
1004
1005 bool
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1006 js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res)
1007 {
1008 RootedString str(cx);
1009 size_t i;
1010 if (index.isInt32()) {
1011 i = index.toInt32();
1012 if (i >= string->length())
1013 goto out_of_range;
1014 } else {
1015 double d = 0.0;
1016 if (!ToInteger(cx, index, &d))
1017 return false;
1018 // check whether d is negative as size_t is unsigned
1019 if (d < 0 || string->length() <= d )
1020 goto out_of_range;
1021 i = size_t(d);
1022 }
1023 char16_t c;
1024 if (!string->getChar(cx, i , &c))
1025 return false;
1026 res.setInt32(c);
1027 return true;
1028
1029 out_of_range:
1030 res.setNaN();
1031 return true;
1032 }
1033
1034 bool
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1035 js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp)
1036 {
1037 CallArgs args = CallArgsFromVp(argc, vp);
1038 RootedString str(cx);
1039 RootedValue index(cx);
1040 if (args.thisv().isString()) {
1041 str = args.thisv().toString();
1042 } else {
1043 str = ThisToStringForStringProto(cx, args);
1044 if (!str)
1045 return false;
1046 }
1047 if (args.length() != 0)
1048 index = args[0];
1049 else
1050 index.setInt32(0);
1051
1052 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1053 }
1054
1055 /*
1056 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1057 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1058 *
1059 * Return the index of pat in text, or -1 if not found.
1060 */
1061 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1062 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1063 static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
1064
1065 template <typename TextChar, typename PatChar>
1066 static int
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1067 BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1068 {
1069 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1070
1071 uint8_t skip[sBMHCharSetSize];
1072 for (uint32_t i = 0; i < sBMHCharSetSize; i++)
1073 skip[i] = uint8_t(patLen);
1074
1075 uint32_t patLast = patLen - 1;
1076 for (uint32_t i = 0; i < patLast; i++) {
1077 char16_t c = pat[i];
1078 if (c >= sBMHCharSetSize)
1079 return sBMHBadPattern;
1080 skip[c] = uint8_t(patLast - i);
1081 }
1082
1083 for (uint32_t k = patLast; k < textLen; ) {
1084 for (uint32_t i = k, j = patLast; ; i--, j--) {
1085 if (text[i] != pat[j])
1086 break;
1087 if (j == 0)
1088 return static_cast<int>(i); /* safe: max string size */
1089 }
1090
1091 char16_t c = text[k];
1092 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1093 }
1094 return -1;
1095 }
1096
1097 template <typename TextChar, typename PatChar>
1098 struct MemCmp {
1099 typedef uint32_t Extent;
computeExtentMemCmp1100 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) {
1101 return (patLen - 1) * sizeof(PatChar);
1102 }
matchMemCmp1103 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1104 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1105 return memcmp(p, t, extent) == 0;
1106 }
1107 };
1108
1109 template <typename TextChar, typename PatChar>
1110 struct ManualCmp {
1111 typedef const PatChar* Extent;
computeExtentManualCmp1112 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) {
1113 return pat + patLen;
1114 }
matchManualCmp1115 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1116 for (; p != extent; ++p, ++t) {
1117 if (*p != *t)
1118 return false;
1119 }
1120 return true;
1121 }
1122 };
1123
1124 template <typename TextChar, typename PatChar>
1125 static const TextChar*
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1126 FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat)
1127 {
1128 const TextChar* textend = text + n;
1129 const TextChar* t = text;
1130
1131 switch ((textend - t) & 7) {
1132 case 0: if (*t++ == pat) return t - 1;
1133 case 7: if (*t++ == pat) return t - 1;
1134 case 6: if (*t++ == pat) return t - 1;
1135 case 5: if (*t++ == pat) return t - 1;
1136 case 4: if (*t++ == pat) return t - 1;
1137 case 3: if (*t++ == pat) return t - 1;
1138 case 2: if (*t++ == pat) return t - 1;
1139 case 1: if (*t++ == pat) return t - 1;
1140 }
1141 while (textend != t) {
1142 if (t[0] == pat) return t;
1143 if (t[1] == pat) return t + 1;
1144 if (t[2] == pat) return t + 2;
1145 if (t[3] == pat) return t + 3;
1146 if (t[4] == pat) return t + 4;
1147 if (t[5] == pat) return t + 5;
1148 if (t[6] == pat) return t + 6;
1149 if (t[7] == pat) return t + 7;
1150 t += 8;
1151 }
1152 return nullptr;
1153 }
1154
1155 static const char*
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1156 FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
1157 {
1158 #if defined(__clang__)
1159 return FirstCharMatcherUnrolled<char, char>(text, n, pat);
1160 #else
1161 return reinterpret_cast<const char*>(memchr(text, pat, n));
1162 #endif
1163 }
1164
1165 static const char16_t*
FirstCharMatcher16bit(const char16_t * text,uint32_t n,const char16_t pat)1166 FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat)
1167 {
1168 #if defined(XP_DARWIN) || defined(XP_WIN)
1169 /*
1170 * Performance of memchr is horrible in OSX. Windows is better,
1171 * but it is still better to use UnrolledMatcher.
1172 */
1173 return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat);
1174 #else
1175 /*
1176 * For linux the best performance is obtained by slightly hacking memchr.
1177 * memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
1178 * in blocks of 8bit and use memchr.
1179 */
1180
1181 const char* text8 = (const char*) text;
1182 const char* pat8 = reinterpret_cast<const char*>(&pat);
1183
1184 MOZ_ASSERT(n < UINT32_MAX/2);
1185 n *= 2;
1186
1187 uint32_t i = 0;
1188 while (i < n) {
1189 /* Find the first 8 bits of 16bit character in text. */
1190 const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
1191 if (pos8 == nullptr)
1192 return nullptr;
1193 i = static_cast<uint32_t>(pos8 - text8);
1194
1195 /* Incorrect match if it matches the last 8 bits of 16bit char. */
1196 if (i % 2 != 0) {
1197 i++;
1198 continue;
1199 }
1200
1201 /* Test if last 8 bits match last 8 bits of 16bit char. */
1202 if (pat8[1] == text8[i + 1])
1203 return (text + (i/2));
1204
1205 i += 2;
1206 }
1207 return nullptr;
1208 #endif
1209 }
1210
1211 template <class InnerMatch, typename TextChar, typename PatChar>
1212 static int
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1213 Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
1214 {
1215 const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
1216
1217 uint32_t i = 0;
1218 uint32_t n = textlen - patlen + 1;
1219 while (i < n) {
1220 const TextChar* pos;
1221
1222 if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
1223 pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]);
1224 else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
1225 pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
1226 else
1227 pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
1228
1229 if (pos == nullptr)
1230 return -1;
1231
1232 i = static_cast<uint32_t>(pos - text);
1233 if (InnerMatch::match(pat + 1, text + i + 1, extent))
1234 return i;
1235
1236 i += 1;
1237 }
1238 return -1;
1239 }
1240
1241
1242 template <typename TextChar, typename PatChar>
1243 static MOZ_ALWAYS_INLINE int
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1244 StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1245 {
1246 if (patLen == 0)
1247 return 0;
1248 if (textLen < patLen)
1249 return -1;
1250
1251 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1252 /*
1253 * Given enough registers, the unrolled loop below is faster than the
1254 * following loop. 32-bit x86 does not have enough registers.
1255 */
1256 if (patLen == 1) {
1257 const PatChar p0 = *pat;
1258 const TextChar* end = text + textLen;
1259 for (const TextChar* c = text; c != end; ++c) {
1260 if (*c == p0)
1261 return c - text;
1262 }
1263 return -1;
1264 }
1265 #endif
1266
1267 /*
1268 * If the text or pattern string is short, BMH will be more expensive than
1269 * the basic linear scan due to initialization cost and a more complex loop
1270 * body. While the correct threshold is input-dependent, we can make a few
1271 * conservative observations:
1272 * - When |textLen| is "big enough", the initialization time will be
1273 * proportionally small, so the worst-case slowdown is minimized.
1274 * - When |patLen| is "too small", even the best case for BMH will be
1275 * slower than a simple scan for large |textLen| due to the more complex
1276 * loop body of BMH.
1277 * From this, the values for "big enough" and "too small" are determined
1278 * empirically. See bug 526348.
1279 */
1280 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1281 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1282 if (index != sBMHBadPattern)
1283 return index;
1284 }
1285
1286 /*
1287 * For big patterns with large potential overlap we want the SIMD-optimized
1288 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1289 * use memcmp if one of the strings is TwoByte and the other is Latin1.
1290 *
1291 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1292 */
1293 return
1294 #if !defined(__linux__)
1295 (patLen > 128 && IsSame<TextChar, PatChar>::value)
1296 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
1297 :
1298 #endif
1299 Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen);
1300 }
1301
1302 static int32_t
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1303 StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0)
1304 {
1305 MOZ_ASSERT(start <= text->length());
1306 uint32_t textLen = text->length() - start;
1307 uint32_t patLen = pat->length();
1308
1309 int match;
1310 AutoCheckCannotGC nogc;
1311 if (text->hasLatin1Chars()) {
1312 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1313 if (pat->hasLatin1Chars())
1314 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1315 else
1316 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1317 } else {
1318 const char16_t* textChars = text->twoByteChars(nogc) + start;
1319 if (pat->hasLatin1Chars())
1320 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1321 else
1322 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1323 }
1324
1325 return (match == -1) ? -1 : start + match;
1326 }
1327
1328 static const size_t sRopeMatchThresholdRatioLog2 = 5;
1329
1330 bool
StringHasPattern(JSLinearString * text,const char16_t * pat,uint32_t patLen)1331 js::StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patLen)
1332 {
1333 AutoCheckCannotGC nogc;
1334 return text->hasLatin1Chars()
1335 ? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1
1336 : StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1;
1337 }
1338
1339 int
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1340 js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start)
1341 {
1342 return StringMatch(text, pat, start);
1343 }
1344
1345 // When an algorithm does not need a string represented as a single linear
1346 // array of characters, this range utility may be used to traverse the string a
1347 // sequence of linear arrays of characters. This avoids flattening ropes.
1348 class StringSegmentRange
1349 {
1350 // If malloc() shows up in any profiles from this vector, we can add a new
1351 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1352 Rooted<StringVector> stack;
1353 RootedLinearString cur;
1354
settle(JSString * str)1355 bool settle(JSString* str) {
1356 while (str->isRope()) {
1357 JSRope& rope = str->asRope();
1358 if (!stack.append(rope.rightChild()))
1359 return false;
1360 str = rope.leftChild();
1361 }
1362 cur = &str->asLinear();
1363 return true;
1364 }
1365
1366 public:
StringSegmentRange(JSContext * cx)1367 explicit StringSegmentRange(JSContext* cx)
1368 : stack(cx, StringVector(cx)), cur(cx)
1369 {}
1370
init(JSString * str)1371 MOZ_WARN_UNUSED_RESULT bool init(JSString* str) {
1372 MOZ_ASSERT(stack.empty());
1373 return settle(str);
1374 }
1375
empty() const1376 bool empty() const {
1377 return cur == nullptr;
1378 }
1379
front() const1380 JSLinearString* front() const {
1381 MOZ_ASSERT(!cur->isRope());
1382 return cur;
1383 }
1384
popFront()1385 MOZ_WARN_UNUSED_RESULT bool popFront() {
1386 MOZ_ASSERT(!empty());
1387 if (stack.empty()) {
1388 cur = nullptr;
1389 return true;
1390 }
1391 return settle(stack.popCopy());
1392 }
1393 };
1394
1395 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1396
1397 template <typename TextChar, typename PatChar>
1398 static int
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1399 RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings,
1400 const PatChar* pat, size_t patLen)
1401 {
1402 /* Absolute offset from the beginning of the logical text string. */
1403 int pos = 0;
1404
1405 for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) {
1406 /* Try to find a match within 'outer'. */
1407 JSLinearString* outer = *outerp;
1408 const TextChar* chars = outer->chars<TextChar>(nogc);
1409 size_t len = outer->length();
1410 int matchResult = StringMatch(chars, len, pat, patLen);
1411 if (matchResult != -1) {
1412 /* Matched! */
1413 return pos + matchResult;
1414 }
1415
1416 /* Try to find a match starting in 'outer' and running into other nodes. */
1417 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1418 const TextChar* const textend = chars + len;
1419 const PatChar p0 = *pat;
1420 const PatChar* const p1 = pat + 1;
1421 const PatChar* const patend = pat + patLen;
1422 for (const TextChar* t = text; t != textend; ) {
1423 if (*t++ != p0)
1424 continue;
1425
1426 JSLinearString** innerp = outerp;
1427 const TextChar* ttend = textend;
1428 const TextChar* tt = t;
1429 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1430 while (tt == ttend) {
1431 if (++innerp == strings.end())
1432 return -1;
1433
1434 JSLinearString* inner = *innerp;
1435 tt = inner->chars<TextChar>(nogc);
1436 ttend = tt + inner->length();
1437 }
1438 if (*pp != *tt)
1439 goto break_continue;
1440 }
1441
1442 /* Matched! */
1443 return pos + (t - chars) - 1; /* -1 because of *t++ above */
1444
1445 break_continue:;
1446 }
1447
1448 pos += len;
1449 }
1450
1451 return -1;
1452 }
1453
1454 /*
1455 * RopeMatch takes the text to search and the pattern to search for in the text.
1456 * RopeMatch returns false on OOM and otherwise returns the match index through
1457 * the 'match' outparam (-1 for not found).
1458 */
1459 static bool
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)1460 RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
1461 {
1462 uint32_t patLen = pat->length();
1463 if (patLen == 0) {
1464 *match = 0;
1465 return true;
1466 }
1467 if (text->length() < patLen) {
1468 *match = -1;
1469 return true;
1470 }
1471
1472 /*
1473 * List of leaf nodes in the rope. If we run out of memory when trying to
1474 * append to this list, we can still fall back to StringMatch, so use the
1475 * system allocator so we don't report OOM in that case.
1476 */
1477 LinearStringVector strings;
1478
1479 /*
1480 * We don't want to do rope matching if there is a poor node-to-char ratio,
1481 * since this means spending a lot of time in the match loop below. We also
1482 * need to build the list of leaf nodes. Do both here: iterate over the
1483 * nodes so long as there are not too many.
1484 *
1485 * We also don't use rope matching if the rope contains both Latin1 and
1486 * TwoByte nodes, to simplify the match algorithm.
1487 */
1488 {
1489 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1490 StringSegmentRange r(cx);
1491 if (!r.init(text))
1492 return false;
1493
1494 bool textIsLatin1 = text->hasLatin1Chars();
1495 while (!r.empty()) {
1496 if (threshold-- == 0 ||
1497 r.front()->hasLatin1Chars() != textIsLatin1 ||
1498 !strings.append(r.front()))
1499 {
1500 JSLinearString* linear = text->ensureLinear(cx);
1501 if (!linear)
1502 return false;
1503
1504 *match = StringMatch(linear, pat);
1505 return true;
1506 }
1507 if (!r.popFront())
1508 return false;
1509 }
1510 }
1511
1512 AutoCheckCannotGC nogc;
1513 if (text->hasLatin1Chars()) {
1514 if (pat->hasLatin1Chars())
1515 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen);
1516 else
1517 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen);
1518 } else {
1519 if (pat->hasLatin1Chars())
1520 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), patLen);
1521 else
1522 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), patLen);
1523 }
1524
1525 return true;
1526 }
1527
1528 /* ES6 draft rc4 21.1.3.7. */
1529 static bool
str_includes(JSContext * cx,unsigned argc,Value * vp)1530 str_includes(JSContext* cx, unsigned argc, Value* vp)
1531 {
1532 CallArgs args = CallArgsFromVp(argc, vp);
1533
1534 // Steps 1, 2, and 3
1535 RootedString str(cx, ThisToStringForStringProto(cx, args));
1536 if (!str)
1537 return false;
1538
1539 // Steps 4 and 5
1540 bool isRegExp;
1541 if (!IsRegExp(cx, args.get(0), &isRegExp))
1542 return false;
1543
1544 // Step 6
1545 if (isRegExp) {
1546 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1547 "first", "", "Regular Expression");
1548 return false;
1549 }
1550
1551 // Steps 7 and 8
1552 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1553 if (!searchStr)
1554 return false;
1555
1556 // Steps 9 and 10
1557 uint32_t pos = 0;
1558 if (args.hasDefined(1)) {
1559 if (args[1].isInt32()) {
1560 int i = args[1].toInt32();
1561 pos = (i < 0) ? 0U : uint32_t(i);
1562 } else {
1563 double d;
1564 if (!ToInteger(cx, args[1], &d))
1565 return false;
1566 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1567 }
1568 }
1569
1570 // Step 11
1571 uint32_t textLen = str->length();
1572
1573 // Step 12
1574 uint32_t start = Min(Max(pos, 0U), textLen);
1575
1576 // Steps 13 and 14
1577 JSLinearString* text = str->ensureLinear(cx);
1578 if (!text)
1579 return false;
1580
1581 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
1582 return true;
1583 }
1584
1585 /* TODO: remove String.prototype.contains (bug 1103588) */
1586 static bool
str_contains(JSContext * cx,unsigned argc,Value * vp)1587 str_contains(JSContext *cx, unsigned argc, Value *vp)
1588 {
1589 #ifndef RELEASE_BUILD
1590 CallArgs args = CallArgsFromVp(argc, vp);
1591 RootedObject callee(cx, &args.callee());
1592 if (!GlobalObject::warnOnceAboutStringContains(cx, callee))
1593 return false;
1594 #endif
1595 return str_includes(cx, argc, vp);
1596 }
1597
1598 /* ES6 20120927 draft 15.5.4.7. */
1599 bool
str_indexOf(JSContext * cx,unsigned argc,Value * vp)1600 js::str_indexOf(JSContext* cx, unsigned argc, Value* vp)
1601 {
1602 CallArgs args = CallArgsFromVp(argc, vp);
1603
1604 // Steps 1, 2, and 3
1605 RootedString str(cx, ThisToStringForStringProto(cx, args));
1606 if (!str)
1607 return false;
1608
1609 // Steps 4 and 5
1610 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1611 if (!searchStr)
1612 return false;
1613
1614 // Steps 6 and 7
1615 uint32_t pos = 0;
1616 if (args.hasDefined(1)) {
1617 if (args[1].isInt32()) {
1618 int i = args[1].toInt32();
1619 pos = (i < 0) ? 0U : uint32_t(i);
1620 } else {
1621 double d;
1622 if (!ToInteger(cx, args[1], &d))
1623 return false;
1624 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1625 }
1626 }
1627
1628 // Step 8
1629 uint32_t textLen = str->length();
1630
1631 // Step 9
1632 uint32_t start = Min(Max(pos, 0U), textLen);
1633
1634 // Steps 10 and 11
1635 JSLinearString* text = str->ensureLinear(cx);
1636 if (!text)
1637 return false;
1638
1639 args.rval().setInt32(StringMatch(text, searchStr, start));
1640 return true;
1641 }
1642
1643 template <typename TextChar, typename PatChar>
1644 static int32_t
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)1645 LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen,
1646 size_t start)
1647 {
1648 MOZ_ASSERT(patLen > 0);
1649 MOZ_ASSERT(patLen <= textLen);
1650 MOZ_ASSERT(start <= textLen - patLen);
1651
1652 const PatChar p0 = *pat;
1653 const PatChar* patNext = pat + 1;
1654 const PatChar* patEnd = pat + patLen;
1655
1656 for (const TextChar* t = text + start; t >= text; --t) {
1657 if (*t == p0) {
1658 const TextChar* t1 = t + 1;
1659 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
1660 if (*t1 != *p1)
1661 goto break_continue;
1662 }
1663
1664 return static_cast<int32_t>(t - text);
1665 }
1666 break_continue:;
1667 }
1668
1669 return -1;
1670 }
1671
1672 bool
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)1673 js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
1674 {
1675 CallArgs args = CallArgsFromVp(argc, vp);
1676 RootedString textstr(cx, ThisToStringForStringProto(cx, args));
1677 if (!textstr)
1678 return false;
1679
1680 RootedLinearString pat(cx, ArgToRootedString(cx, args, 0));
1681 if (!pat)
1682 return false;
1683
1684 size_t textLen = textstr->length();
1685 size_t patLen = pat->length();
1686 int start = textLen - patLen; // Start searching here
1687 if (start < 0) {
1688 args.rval().setInt32(-1);
1689 return true;
1690 }
1691
1692 if (args.hasDefined(1)) {
1693 if (args[1].isInt32()) {
1694 int i = args[1].toInt32();
1695 if (i <= 0)
1696 start = 0;
1697 else if (i < start)
1698 start = i;
1699 } else {
1700 double d;
1701 if (!ToNumber(cx, args[1], &d))
1702 return false;
1703 if (!IsNaN(d)) {
1704 d = JS::ToInteger(d);
1705 if (d <= 0)
1706 start = 0;
1707 else if (d < start)
1708 start = int(d);
1709 }
1710 }
1711 }
1712
1713 if (patLen == 0) {
1714 args.rval().setInt32(start);
1715 return true;
1716 }
1717
1718 JSLinearString* text = textstr->ensureLinear(cx);
1719 if (!text)
1720 return false;
1721
1722 int32_t res;
1723 AutoCheckCannotGC nogc;
1724 if (text->hasLatin1Chars()) {
1725 const Latin1Char* textChars = text->latin1Chars(nogc);
1726 if (pat->hasLatin1Chars())
1727 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1728 else
1729 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1730 } else {
1731 const char16_t* textChars = text->twoByteChars(nogc);
1732 if (pat->hasLatin1Chars())
1733 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1734 else
1735 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1736 }
1737
1738 args.rval().setInt32(res);
1739 return true;
1740 }
1741
1742 bool
HasSubstringAt(JSLinearString * text,JSLinearString * pat,size_t start)1743 js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
1744 {
1745 MOZ_ASSERT(start + pat->length() <= text->length());
1746
1747 size_t patLen = pat->length();
1748
1749 AutoCheckCannotGC nogc;
1750 if (text->hasLatin1Chars()) {
1751 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1752 if (pat->hasLatin1Chars())
1753 return PodEqual(textChars, pat->latin1Chars(nogc), patLen);
1754
1755 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1756 }
1757
1758 const char16_t* textChars = text->twoByteChars(nogc) + start;
1759 if (pat->hasTwoByteChars())
1760 return PodEqual(textChars, pat->twoByteChars(nogc), patLen);
1761
1762 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1763 }
1764
1765 /* ES6 draft rc3 21.1.3.18. */
1766 bool
str_startsWith(JSContext * cx,unsigned argc,Value * vp)1767 js::str_startsWith(JSContext* cx, unsigned argc, Value* vp)
1768 {
1769 CallArgs args = CallArgsFromVp(argc, vp);
1770
1771 // Steps 1, 2, and 3
1772 RootedString str(cx, ThisToStringForStringProto(cx, args));
1773 if (!str)
1774 return false;
1775
1776 // Steps 4 and 5
1777 bool isRegExp;
1778 if (!IsRegExp(cx, args.get(0), &isRegExp))
1779 return false;
1780
1781 // Step 6
1782 if (isRegExp) {
1783 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1784 "first", "", "Regular Expression");
1785 return false;
1786 }
1787
1788 // Steps 7 and 8
1789 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1790 if (!searchStr)
1791 return false;
1792
1793 // Steps 9 and 10
1794 uint32_t pos = 0;
1795 if (args.hasDefined(1)) {
1796 if (args[1].isInt32()) {
1797 int i = args[1].toInt32();
1798 pos = (i < 0) ? 0U : uint32_t(i);
1799 } else {
1800 double d;
1801 if (!ToInteger(cx, args[1], &d))
1802 return false;
1803 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1804 }
1805 }
1806
1807 // Step 11
1808 uint32_t textLen = str->length();
1809
1810 // Step 12
1811 uint32_t start = Min(Max(pos, 0U), textLen);
1812
1813 // Step 13
1814 uint32_t searchLen = searchStr->length();
1815
1816 // Step 14
1817 if (searchLen + start < searchLen || searchLen + start > textLen) {
1818 args.rval().setBoolean(false);
1819 return true;
1820 }
1821
1822 // Steps 15 and 16
1823 JSLinearString* text = str->ensureLinear(cx);
1824 if (!text)
1825 return false;
1826
1827 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1828 return true;
1829 }
1830
1831 /* ES6 draft rc3 21.1.3.6. */
1832 static bool
str_endsWith(JSContext * cx,unsigned argc,Value * vp)1833 str_endsWith(JSContext* cx, unsigned argc, Value* vp)
1834 {
1835 CallArgs args = CallArgsFromVp(argc, vp);
1836
1837 // Steps 1, 2, and 3
1838 RootedString str(cx, ThisToStringForStringProto(cx, args));
1839 if (!str)
1840 return false;
1841
1842 // Steps 4 and 5
1843 bool isRegExp;
1844 if (!IsRegExp(cx, args.get(0), &isRegExp))
1845 return false;
1846
1847 // Step 6
1848 if (isRegExp) {
1849 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1850 "first", "", "Regular Expression");
1851 return false;
1852 }
1853
1854 // Steps 7 and 8
1855 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1856 if (!searchStr)
1857 return false;
1858
1859 // Step 9
1860 uint32_t textLen = str->length();
1861
1862 // Steps 10 and 11
1863 uint32_t pos = textLen;
1864 if (args.hasDefined(1)) {
1865 if (args[1].isInt32()) {
1866 int i = args[1].toInt32();
1867 pos = (i < 0) ? 0U : uint32_t(i);
1868 } else {
1869 double d;
1870 if (!ToInteger(cx, args[1], &d))
1871 return false;
1872 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1873 }
1874 }
1875
1876 // Step 12
1877 uint32_t end = Min(Max(pos, 0U), textLen);
1878
1879 // Step 13
1880 uint32_t searchLen = searchStr->length();
1881
1882 // Step 15 (reordered)
1883 if (searchLen > end) {
1884 args.rval().setBoolean(false);
1885 return true;
1886 }
1887
1888 // Step 14
1889 uint32_t start = end - searchLen;
1890
1891 // Steps 16 and 17
1892 JSLinearString* text = str->ensureLinear(cx);
1893 if (!text)
1894 return false;
1895
1896 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1897 return true;
1898 }
1899
1900 template <typename CharT>
1901 static void
TrimString(const CharT * chars,bool trimLeft,bool trimRight,size_t length,size_t * pBegin,size_t * pEnd)1902 TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length,
1903 size_t* pBegin, size_t* pEnd)
1904 {
1905 size_t begin = 0, end = length;
1906
1907 if (trimLeft) {
1908 while (begin < length && unicode::IsSpace(chars[begin]))
1909 ++begin;
1910 }
1911
1912 if (trimRight) {
1913 while (end > begin && unicode::IsSpace(chars[end - 1]))
1914 --end;
1915 }
1916
1917 *pBegin = begin;
1918 *pEnd = end;
1919 }
1920
1921 static bool
TrimString(JSContext * cx,Value * vp,bool trimLeft,bool trimRight)1922 TrimString(JSContext* cx, Value* vp, bool trimLeft, bool trimRight)
1923 {
1924 CallReceiver call = CallReceiverFromVp(vp);
1925 RootedString str(cx, ThisToStringForStringProto(cx, call));
1926 if (!str)
1927 return false;
1928
1929 JSLinearString* linear = str->ensureLinear(cx);
1930 if (!linear)
1931 return false;
1932
1933 size_t length = linear->length();
1934 size_t begin, end;
1935 if (linear->hasLatin1Chars()) {
1936 AutoCheckCannotGC nogc;
1937 TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end);
1938 } else {
1939 AutoCheckCannotGC nogc;
1940 TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end);
1941 }
1942
1943 str = NewDependentString(cx, str, begin, end - begin);
1944 if (!str)
1945 return false;
1946
1947 call.rval().setString(str);
1948 return true;
1949 }
1950
1951 static bool
str_trim(JSContext * cx,unsigned argc,Value * vp)1952 str_trim(JSContext* cx, unsigned argc, Value* vp)
1953 {
1954 return TrimString(cx, vp, true, true);
1955 }
1956
1957 static bool
str_trimLeft(JSContext * cx,unsigned argc,Value * vp)1958 str_trimLeft(JSContext* cx, unsigned argc, Value* vp)
1959 {
1960 return TrimString(cx, vp, true, false);
1961 }
1962
1963 static bool
str_trimRight(JSContext * cx,unsigned argc,Value * vp)1964 str_trimRight(JSContext* cx, unsigned argc, Value* vp)
1965 {
1966 return TrimString(cx, vp, false, true);
1967 }
1968
1969 /*
1970 * Perl-inspired string functions.
1971 */
1972
1973 namespace {
1974
1975 /* Result of a successfully performed flat match. */
1976 class FlatMatch
1977 {
1978 RootedAtom pat_;
1979 int32_t match_;
1980
1981 friend class StringRegExpGuard;
1982
1983 public:
FlatMatch(JSContext * cx)1984 explicit FlatMatch(JSContext* cx) : pat_(cx) {}
pattern() const1985 JSLinearString* pattern() const { return pat_; }
patternLength() const1986 size_t patternLength() const { return pat_->length(); }
1987
1988 /*
1989 * Note: The match is -1 when the match is performed successfully,
1990 * but no match is found.
1991 */
match() const1992 int32_t match() const { return match_; }
1993 };
1994
1995 } /* anonymous namespace */
1996
1997 static inline bool
IsRegExpMetaChar(char16_t c)1998 IsRegExpMetaChar(char16_t c)
1999 {
2000 switch (c) {
2001 /* Taken from the PatternCharacter production in 15.10.1. */
2002 case '^': case '$': case '\\': case '.': case '*': case '+':
2003 case '?': case '(': case ')': case '[': case ']': case '{':
2004 case '}': case '|':
2005 return true;
2006 default:
2007 return false;
2008 }
2009 }
2010
2011 template <typename CharT>
2012 bool
HasRegExpMetaChars(const CharT * chars,size_t length)2013 js::HasRegExpMetaChars(const CharT* chars, size_t length)
2014 {
2015 for (size_t i = 0; i < length; ++i) {
2016 if (IsRegExpMetaChar(chars[i]))
2017 return true;
2018 }
2019 return false;
2020 }
2021
2022 template bool
2023 js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars, size_t length);
2024
2025 template bool
2026 js::HasRegExpMetaChars<char16_t>(const char16_t* chars, size_t length);
2027
2028 bool
StringHasRegExpMetaChars(JSLinearString * str)2029 js::StringHasRegExpMetaChars(JSLinearString* str)
2030 {
2031 AutoCheckCannotGC nogc;
2032 if (str->hasLatin1Chars())
2033 return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
2034
2035 return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
2036 }
2037
2038 namespace {
2039
2040 /*
2041 * StringRegExpGuard factors logic out of String regexp operations.
2042 *
2043 * |optarg| indicates in which argument position RegExp flags will be found, if
2044 * present. This is a Mozilla extension and not part of any ECMA spec.
2045 */
2046 class MOZ_STACK_CLASS StringRegExpGuard
2047 {
2048 RegExpGuard re_;
2049 FlatMatch fm;
2050 RootedObject obj_;
2051
2052 /*
2053 * Upper bound on the number of characters we are willing to potentially
2054 * waste on searching for RegExp meta-characters.
2055 */
2056 static const size_t MAX_FLAT_PAT_LEN = 256;
2057
2058 template <typename CharT>
2059 static bool
flattenPattern(StringBuffer & sb,const CharT * chars,size_t len)2060 flattenPattern(StringBuffer& sb, const CharT* chars, size_t len)
2061 {
2062 static const char ESCAPE_CHAR = '\\';
2063 for (const CharT* it = chars; it < chars + len; ++it) {
2064 if (IsRegExpMetaChar(*it)) {
2065 if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
2066 return false;
2067 } else {
2068 if (!sb.append(*it))
2069 return false;
2070 }
2071 }
2072 return true;
2073 }
2074
2075 static JSAtom*
flattenPattern(JSContext * cx,JSAtom * pat)2076 flattenPattern(JSContext* cx, JSAtom* pat)
2077 {
2078 StringBuffer sb(cx);
2079 if (!sb.reserve(pat->length()))
2080 return nullptr;
2081
2082 if (pat->hasLatin1Chars()) {
2083 AutoCheckCannotGC nogc;
2084 if (!flattenPattern(sb, pat->latin1Chars(nogc), pat->length()))
2085 return nullptr;
2086 } else {
2087 AutoCheckCannotGC nogc;
2088 if (!flattenPattern(sb, pat->twoByteChars(nogc), pat->length()))
2089 return nullptr;
2090 }
2091
2092 return sb.finishAtom();
2093 }
2094
2095 public:
StringRegExpGuard(JSContext * cx)2096 explicit StringRegExpGuard(JSContext* cx)
2097 : re_(cx), fm(cx), obj_(cx)
2098 { }
2099
2100 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
init(JSContext * cx,const CallArgs & args,bool convertVoid=false)2101 bool init(JSContext* cx, const CallArgs& args, bool convertVoid = false)
2102 {
2103 if (args.length() != 0) {
2104 ESClassValue cls;
2105 if (!GetClassOfValue(cx, args[0], &cls))
2106 return false;
2107
2108 if (cls == ESClass_RegExp)
2109 return initRegExp(cx, &args[0].toObject());
2110 }
2111
2112 if (convertVoid && !args.hasDefined(0)) {
2113 fm.pat_ = cx->runtime()->emptyString;
2114 return true;
2115 }
2116
2117 JSString* arg = ArgToRootedString(cx, args, 0);
2118 if (!arg)
2119 return false;
2120
2121 fm.pat_ = AtomizeString(cx, arg);
2122 if (!fm.pat_)
2123 return false;
2124
2125 return true;
2126 }
2127
initRegExp(JSContext * cx,JSObject * regexp)2128 bool initRegExp(JSContext* cx, JSObject* regexp) {
2129 obj_ = regexp;
2130 return RegExpToShared(cx, obj_, &re_);
2131 }
2132
init(JSContext * cx,HandleString pattern)2133 bool init(JSContext* cx, HandleString pattern) {
2134 fm.pat_ = AtomizeString(cx, pattern);
2135 if (!fm.pat_)
2136 return false;
2137 return true;
2138 }
2139
2140 /*
2141 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2142 * the pattern string, or a lengthy pattern string can thwart this process.
2143 *
2144 * |checkMetaChars| looks for regexp metachars in the pattern string.
2145 *
2146 * Return whether flat matching could be used.
2147 *
2148 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2149 * cx->isExceptionPending().
2150 */
2151 const FlatMatch*
tryFlatMatch(JSContext * cx,JSString * text,unsigned optarg,unsigned argc,bool checkMetaChars=true)2152 tryFlatMatch(JSContext* cx, JSString* text, unsigned optarg, unsigned argc,
2153 bool checkMetaChars = true)
2154 {
2155 if (re_.initialized())
2156 return nullptr;
2157
2158 if (optarg < argc)
2159 return nullptr;
2160
2161 size_t patLen = fm.pat_->length();
2162 if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
2163 return nullptr;
2164
2165 /*
2166 * |text| could be a rope, so we want to avoid flattening it for as
2167 * long as possible.
2168 */
2169 if (text->isRope()) {
2170 if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
2171 return nullptr;
2172 } else {
2173 fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
2174 }
2175
2176 return &fm;
2177 }
2178
2179 /* If the pattern is not already a regular expression, make it so. */
normalizeRegExp(JSContext * cx,bool flat,unsigned optarg,const CallArgs & args)2180 bool normalizeRegExp(JSContext* cx, bool flat, unsigned optarg, const CallArgs& args)
2181 {
2182 if (re_.initialized())
2183 return true;
2184
2185 /* Build RegExp from pattern string. */
2186 RootedString opt(cx);
2187 if (optarg < args.length()) {
2188 if (JSScript* script = cx->currentScript()) {
2189 const char* filename = script->filename();
2190 cx->compartment()->addTelemetry(filename, JSCompartment::DeprecatedFlagsArgument);
2191 }
2192
2193 if (!cx->compartment()->warnedAboutFlagsArgument) {
2194 if (!JS_ReportErrorFlagsAndNumber(cx, JSREPORT_WARNING, GetErrorMessage, nullptr,
2195 JSMSG_DEPRECATED_FLAGS_ARG))
2196 return false;
2197 cx->compartment()->warnedAboutFlagsArgument = true;
2198 }
2199
2200 opt = ToString<CanGC>(cx, args[optarg]);
2201 if (!opt)
2202 return false;
2203 } else {
2204 opt = nullptr;
2205 }
2206
2207 Rooted<JSAtom*> pat(cx);
2208 if (flat) {
2209 pat = flattenPattern(cx, fm.pat_);
2210 if (!pat)
2211 return false;
2212 } else {
2213 pat = fm.pat_;
2214 }
2215 MOZ_ASSERT(pat);
2216
2217 return cx->compartment()->regExps.get(cx, pat, opt, &re_);
2218 }
2219
zeroLastIndex(JSContext * cx)2220 bool zeroLastIndex(JSContext* cx) {
2221 if (!regExpIsObject())
2222 return true;
2223
2224 // Use a fast path for same-global RegExp objects with writable
2225 // lastIndex.
2226 if (obj_->is<RegExpObject>()) {
2227 RegExpObject* nobj = &obj_->as<RegExpObject>();
2228 if (nobj->lookup(cx, cx->names().lastIndex)->writable()) {
2229 nobj->zeroLastIndex(cx);
2230 return true;
2231 }
2232 }
2233
2234 // Handle everything else generically (including throwing if .lastIndex is non-writable).
2235 RootedValue zero(cx, Int32Value(0));
2236 return SetProperty(cx, obj_, cx->names().lastIndex, zero);
2237 }
2238
regExp()2239 RegExpShared& regExp() { return *re_; }
2240
regExpIsObject()2241 bool regExpIsObject() { return obj_ != nullptr; }
regExpObject()2242 HandleObject regExpObject() {
2243 MOZ_ASSERT(regExpIsObject());
2244 return obj_;
2245 }
2246
2247 private:
2248 StringRegExpGuard(const StringRegExpGuard&) = delete;
2249 void operator=(const StringRegExpGuard&) = delete;
2250 };
2251
2252 } /* anonymous namespace */
2253
2254 static bool
DoMatchLocal(JSContext * cx,const CallArgs & args,RegExpStatics * res,HandleLinearString input,RegExpShared & re)2255 DoMatchLocal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
2256 RegExpShared& re)
2257 {
2258 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2259 RegExpRunStatus status = re.execute(cx, input, 0, &matches);
2260 if (status == RegExpRunStatus_Error)
2261 return false;
2262
2263 if (status == RegExpRunStatus_Success_NotFound) {
2264 args.rval().setNull();
2265 return true;
2266 }
2267
2268 if (!res->updateFromMatchPairs(cx, input, matches))
2269 return false;
2270
2271 RootedValue rval(cx);
2272 if (!CreateRegExpMatchResult(cx, input, matches, &rval))
2273 return false;
2274
2275 args.rval().set(rval);
2276 return true;
2277 }
2278
2279 /* ES5 15.5.4.10 step 8. */
2280 static bool
DoMatchGlobal(JSContext * cx,const CallArgs & args,RegExpStatics * res,HandleLinearString input,StringRegExpGuard & g)2281 DoMatchGlobal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
2282 StringRegExpGuard& g)
2283 {
2284 // Step 8a.
2285 //
2286 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2287 // rest of String.prototype.match, particularly in steps 8f(i) and
2288 // 8f(iii)(2)(a). Here's why.
2289 //
2290 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2291 // whose .global is true and a string. The only side effect of a call in
2292 // these circumstances is that the RegExp's .lastIndex will be modified to
2293 // the next starting index after the discovered match (or to 0 if there's
2294 // no remaining match). Because .lastIndex is a non-configurable data
2295 // property and no script-controllable code executes after step 8a, passing
2296 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
2297 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2298 // so the final value of .lastIndex is 0: exactly the state after step 8a
2299 // succeeds. No spec step lets script observe intermediate .lastIndex
2300 // values.
2301 //
2302 // The arrays returned by RegExp.prototype.exec always have a string at
2303 // index 0, for which [[Get]]s have no side effects.
2304 //
2305 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2306 //
2307 // This is a tricky point, because after this set, our implementation *can*
2308 // fail. The key is that script can't distinguish these failure modes from
2309 // one where, in spec terms, we fail immediately after step 8a. That *in
2310 // reality* we might have done extra matching work, or created a partial
2311 // results array to return, or hit an interrupt, is irrelevant. The
2312 // script can't tell we did any of those things but didn't update
2313 // .lastIndex. Thus we can optimize steps 8b onward however we want,
2314 // including eliminating intermediate .lastIndex sets, as long as we don't
2315 // add ways for script to observe the intermediate states.
2316 //
2317 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2318 // we can't get caught.
2319 if (!g.zeroLastIndex(cx))
2320 return false;
2321
2322 // Step 8b.
2323 AutoValueVector elements(cx);
2324
2325 size_t lastSuccessfulStart = 0;
2326
2327 // The loop variables from steps 8c-e aren't needed, as we use different
2328 // techniques from the spec to implement step 8f's loop.
2329
2330 // Step 8f.
2331 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2332 size_t charsLen = input->length();
2333 RegExpShared& re = g.regExp();
2334 for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
2335 if (!CheckForInterrupt(cx))
2336 return false;
2337
2338 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2339 RegExpRunStatus status = re.execute(cx, input, searchIndex, &matches);
2340 if (status == RegExpRunStatus_Error)
2341 return false;
2342
2343 // Step 8f(ii).
2344 if (status == RegExpRunStatus_Success_NotFound)
2345 break;
2346
2347 lastSuccessfulStart = searchIndex;
2348 MatchPair& match = matches[0];
2349
2350 // Steps 8f(iii)(1-3).
2351 searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2352
2353 // Step 8f(iii)(4-5).
2354 JSLinearString* str = NewDependentString(cx, input, match.start, match.length());
2355 if (!str)
2356 return false;
2357 if (!elements.append(StringValue(str)))
2358 return false;
2359 }
2360
2361 // Step 8g.
2362 if (elements.empty()) {
2363 args.rval().setNull();
2364 return true;
2365 }
2366
2367 // The last *successful* match updates the RegExpStatics. (Interestingly,
2368 // this implies that String.prototype.match's semantics aren't those
2369 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2370 res->updateLazily(cx, input, &re, lastSuccessfulStart);
2371
2372 // Steps 8b, 8f(iii)(5-6), 8h.
2373 JSObject* array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
2374 if (!array)
2375 return false;
2376
2377 args.rval().setObject(*array);
2378 return true;
2379 }
2380
2381 static bool
BuildFlatMatchArray(JSContext * cx,HandleString textstr,const FlatMatch & fm,CallArgs * args)2382 BuildFlatMatchArray(JSContext* cx, HandleString textstr, const FlatMatch& fm, CallArgs* args)
2383 {
2384 if (fm.match() < 0) {
2385 args->rval().setNull();
2386 return true;
2387 }
2388
2389 /* Get the templateObject that defines the shape and type of the output object */
2390 JSObject* templateObject = cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
2391 if (!templateObject)
2392 return false;
2393
2394 RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
2395 if (!arr)
2396 return false;
2397
2398 /* Store a Value for each pair. */
2399 arr->setDenseInitializedLength(1);
2400 arr->initDenseElement(0, StringValue(fm.pattern()));
2401
2402 /* Set the |index| property. (TemplateObject positions it in slot 0) */
2403 arr->setSlot(0, Int32Value(fm.match()));
2404
2405 /* Set the |input| property. (TemplateObject positions it in slot 1) */
2406 arr->setSlot(1, StringValue(textstr));
2407
2408 #ifdef DEBUG
2409 RootedValue test(cx);
2410 RootedId id(cx, NameToId(cx->names().index));
2411 if (!NativeGetProperty(cx, arr, id, &test))
2412 return false;
2413 MOZ_ASSERT(test == arr->getSlot(0));
2414 id = NameToId(cx->names().input);
2415 if (!NativeGetProperty(cx, arr, id, &test))
2416 return false;
2417 MOZ_ASSERT(test == arr->getSlot(1));
2418 #endif
2419
2420 args->rval().setObject(*arr);
2421 return true;
2422 }
2423
2424 /* ES5 15.5.4.10. */
2425 bool
str_match(JSContext * cx,unsigned argc,Value * vp)2426 js::str_match(JSContext* cx, unsigned argc, Value* vp)
2427 {
2428 CallArgs args = CallArgsFromVp(argc, vp);
2429
2430 /* Steps 1-2. */
2431 RootedString str(cx, ThisToStringForStringProto(cx, args));
2432 if (!str)
2433 return false;
2434
2435 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2436 StringRegExpGuard g(cx);
2437 if (!g.init(cx, args, true))
2438 return false;
2439
2440 /* Fast path when the search pattern can be searched for as a string. */
2441 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length()))
2442 return BuildFlatMatchArray(cx, str, *fm, &args);
2443
2444 /* Return if there was an error in tryFlatMatch. */
2445 if (cx->isExceptionPending())
2446 return false;
2447
2448 /* Create regular-expression internals as needed to perform the match. */
2449 if (!g.normalizeRegExp(cx, false, 1, args))
2450 return false;
2451
2452 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2453 if (!res)
2454 return false;
2455
2456 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2457 if (!linearStr)
2458 return false;
2459
2460 /* Steps 5-6, 7. */
2461 if (!g.regExp().global())
2462 return DoMatchLocal(cx, args, res, linearStr, g.regExp());
2463
2464 /* Steps 6, 8. */
2465 return DoMatchGlobal(cx, args, res, linearStr, g);
2466 }
2467
2468 bool
str_search(JSContext * cx,unsigned argc,Value * vp)2469 js::str_search(JSContext* cx, unsigned argc, Value* vp)
2470 {
2471 CallArgs args = CallArgsFromVp(argc, vp);
2472 RootedString str(cx, ThisToStringForStringProto(cx, args));
2473 if (!str)
2474 return false;
2475
2476 StringRegExpGuard g(cx);
2477 if (!g.init(cx, args, true))
2478 return false;
2479 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length())) {
2480 args.rval().setInt32(fm->match());
2481 return true;
2482 }
2483
2484 if (cx->isExceptionPending()) /* from tryFlatMatch */
2485 return false;
2486
2487 if (!g.normalizeRegExp(cx, false, 1, args))
2488 return false;
2489
2490 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2491 if (!linearStr)
2492 return false;
2493
2494 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2495 if (!res)
2496 return false;
2497
2498 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2499 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2500 RegExpRunStatus status = g.regExp().execute(cx, linearStr, 0, &matches);
2501 if (status == RegExpRunStatus_Error)
2502 return false;
2503
2504 if (status == RegExpRunStatus_Success)
2505 res->updateLazily(cx, linearStr, &g.regExp(), 0);
2506
2507 args.rval().setInt32(status == RegExpRunStatus_Success_NotFound ? -1 : matches[0].start);
2508 return true;
2509 }
2510
2511 // Utility for building a rope (lazy concatenation) of strings.
2512 class RopeBuilder {
2513 JSContext* cx;
2514 RootedString res;
2515
2516 RopeBuilder(const RopeBuilder& other) = delete;
2517 void operator=(const RopeBuilder& other) = delete;
2518
2519 public:
RopeBuilder(JSContext * cx)2520 explicit RopeBuilder(JSContext* cx)
2521 : cx(cx), res(cx, cx->runtime()->emptyString)
2522 {}
2523
append(HandleString str)2524 inline bool append(HandleString str) {
2525 res = ConcatStrings<CanGC>(cx, res, str);
2526 return !!res;
2527 }
2528
result()2529 inline JSString* result() {
2530 return res;
2531 }
2532 };
2533
2534 namespace {
2535
2536 template <typename CharT>
2537 static uint32_t
FindDollarIndex(const CharT * chars,size_t length)2538 FindDollarIndex(const CharT* chars, size_t length)
2539 {
2540 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2541 uint32_t dollarIndex = p - chars;
2542 MOZ_ASSERT(dollarIndex < length);
2543 return dollarIndex;
2544 }
2545 return UINT32_MAX;
2546 }
2547
2548 struct ReplaceData
2549 {
ReplaceData__anon6cc13cee0311::ReplaceData2550 explicit ReplaceData(JSContext* cx)
2551 : str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
2552 fig(cx, NullValue()), sb(cx)
2553 {}
2554
setReplacementString__anon6cc13cee0311::ReplaceData2555 inline void setReplacementString(JSLinearString* string) {
2556 MOZ_ASSERT(string);
2557 lambda = nullptr;
2558 elembase = nullptr;
2559 repstr = string;
2560
2561 AutoCheckCannotGC nogc;
2562 dollarIndex = string->hasLatin1Chars()
2563 ? FindDollarIndex(string->latin1Chars(nogc), string->length())
2564 : FindDollarIndex(string->twoByteChars(nogc), string->length());
2565 }
2566
setReplacementFunction__anon6cc13cee0311::ReplaceData2567 inline void setReplacementFunction(JSObject* func) {
2568 MOZ_ASSERT(func);
2569 lambda = func;
2570 elembase = nullptr;
2571 repstr = nullptr;
2572 dollarIndex = UINT32_MAX;
2573 }
2574
2575 RootedString str; /* 'this' parameter object as a string */
2576 StringRegExpGuard g; /* regexp parameter object and private data */
2577 RootedObject lambda; /* replacement function object or null */
2578 RootedNativeObject elembase; /* object for function(a){return b[a]} replace */
2579 RootedLinearString repstr; /* replacement string */
2580 uint32_t dollarIndex; /* index of first $ in repstr, or UINT32_MAX */
2581 int leftIndex; /* left context index in str->chars */
2582 bool calledBack; /* record whether callback has been called */
2583 FastInvokeGuard fig; /* used for lambda calls, also holds arguments */
2584 StringBuffer sb; /* buffer built during DoMatch */
2585 };
2586
2587 } /* anonymous namespace */
2588
2589 static bool
2590 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata);
2591
2592 static bool
DoMatchForReplaceLocal(JSContext * cx,RegExpStatics * res,HandleLinearString linearStr,RegExpShared & re,ReplaceData & rdata,size_t * rightContextOffset)2593 DoMatchForReplaceLocal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2594 RegExpShared& re, ReplaceData& rdata, size_t* rightContextOffset)
2595 {
2596 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2597 RegExpRunStatus status = re.execute(cx, linearStr, 0, &matches);
2598 if (status == RegExpRunStatus_Error)
2599 return false;
2600
2601 if (status == RegExpRunStatus_Success_NotFound)
2602 return true;
2603
2604 MatchPair& match = matches[0];
2605 *rightContextOffset = match.limit;
2606
2607 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2608 return false;
2609
2610 return ReplaceRegExp(cx, res, rdata);
2611 }
2612
2613 static bool
DoMatchForReplaceGlobal(JSContext * cx,RegExpStatics * res,HandleLinearString linearStr,RegExpShared & re,ReplaceData & rdata,size_t * rightContextOffset)2614 DoMatchForReplaceGlobal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2615 RegExpShared& re, ReplaceData& rdata, size_t* rightContextOffset)
2616 {
2617 size_t charsLen = linearStr->length();
2618 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2619 for (size_t count = 0, searchIndex = 0; searchIndex <= charsLen; ++count) {
2620 if (!CheckForInterrupt(cx))
2621 return false;
2622
2623 RegExpRunStatus status = re.execute(cx, linearStr, searchIndex, &matches);
2624 if (status == RegExpRunStatus_Error)
2625 return false;
2626
2627 if (status == RegExpRunStatus_Success_NotFound)
2628 break;
2629
2630 MatchPair& match = matches[0];
2631 searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2632 *rightContextOffset = match.limit;
2633
2634 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2635 return false;
2636
2637 if (!ReplaceRegExp(cx, res, rdata))
2638 return false;
2639 }
2640
2641 return true;
2642 }
2643
2644 template <typename CharT>
2645 static bool
InterpretDollar(RegExpStatics * res,const CharT * bp,const CharT * dp,const CharT * ep,ReplaceData & rdata,JSSubString * out,size_t * skip)2646 InterpretDollar(RegExpStatics* res, const CharT* bp, const CharT* dp, const CharT* ep,
2647 ReplaceData& rdata, JSSubString* out, size_t* skip)
2648 {
2649 MOZ_ASSERT(*dp == '$');
2650
2651 /* If there is only a dollar, bail now */
2652 if (dp + 1 >= ep)
2653 return false;
2654
2655 /* Interpret all Perl match-induced dollar variables. */
2656 char16_t dc = dp[1];
2657 if (JS7_ISDEC(dc)) {
2658 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2659 unsigned num = JS7_UNDEC(dc);
2660 if (num > res->getMatches().parenCount())
2661 return false;
2662
2663 const CharT* cp = dp + 2;
2664 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
2665 unsigned tmp = 10 * num + JS7_UNDEC(dc);
2666 if (tmp <= res->getMatches().parenCount()) {
2667 cp++;
2668 num = tmp;
2669 }
2670 }
2671 if (num == 0)
2672 return false;
2673
2674 *skip = cp - dp;
2675
2676 MOZ_ASSERT(num <= res->getMatches().parenCount());
2677
2678 /*
2679 * Note: we index to get the paren with the (1-indexed) pair
2680 * number, as opposed to a (0-indexed) paren number.
2681 */
2682 res->getParen(num, out);
2683 return true;
2684 }
2685
2686 *skip = 2;
2687 switch (dc) {
2688 case '$':
2689 out->init(rdata.repstr, dp - bp, 1);
2690 return true;
2691 case '&':
2692 res->getLastMatch(out);
2693 return true;
2694 case '+':
2695 res->getLastParen(out);
2696 return true;
2697 case '`':
2698 res->getLeftContext(out);
2699 return true;
2700 case '\'':
2701 res->getRightContext(out);
2702 return true;
2703 }
2704 return false;
2705 }
2706
2707 template <typename CharT>
2708 static bool
FindReplaceLengthString(JSContext * cx,RegExpStatics * res,ReplaceData & rdata,size_t * sizep)2709 FindReplaceLengthString(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2710 {
2711 JSLinearString* repstr = rdata.repstr;
2712 CheckedInt<uint32_t> replen = repstr->length();
2713
2714 if (rdata.dollarIndex != UINT32_MAX) {
2715 AutoCheckCannotGC nogc;
2716 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2717 const CharT* bp = repstr->chars<CharT>(nogc);
2718 const CharT* dp = bp + rdata.dollarIndex;
2719 const CharT* ep = bp + repstr->length();
2720 do {
2721 JSSubString sub;
2722 size_t skip;
2723 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2724 if (sub.length > skip)
2725 replen += sub.length - skip;
2726 else
2727 replen -= skip - sub.length;
2728 dp += skip;
2729 } else {
2730 dp++;
2731 }
2732
2733 dp = js_strchr_limit(dp, '$', ep);
2734 } while (dp);
2735 }
2736
2737 if (!replen.isValid()) {
2738 ReportAllocationOverflow(cx);
2739 return false;
2740 }
2741
2742 *sizep = replen.value();
2743 return true;
2744 }
2745
2746 static bool
FindReplaceLength(JSContext * cx,RegExpStatics * res,ReplaceData & rdata,size_t * sizep)2747 FindReplaceLength(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2748 {
2749 if (rdata.elembase) {
2750 /*
2751 * The base object is used when replace was passed a lambda which looks like
2752 * 'function(a) { return b[a]; }' for the base object b. b will not change
2753 * in the course of the replace unless we end up making a scripted call due
2754 * to accessing a scripted getter or a value with a scripted toString.
2755 */
2756 MOZ_ASSERT(rdata.lambda);
2757 MOZ_ASSERT(!rdata.elembase->getOps()->lookupProperty);
2758 MOZ_ASSERT(!rdata.elembase->getOps()->getProperty);
2759
2760 RootedValue match(cx);
2761 if (!res->createLastMatch(cx, &match))
2762 return false;
2763 JSAtom* atom = ToAtom<CanGC>(cx, match);
2764 if (!atom)
2765 return false;
2766
2767 RootedValue v(cx);
2768 if (HasDataProperty(cx, rdata.elembase, AtomToId(atom), v.address()) && v.isString()) {
2769 rdata.repstr = v.toString()->ensureLinear(cx);
2770 if (!rdata.repstr)
2771 return false;
2772 *sizep = rdata.repstr->length();
2773 return true;
2774 }
2775
2776 /*
2777 * Couldn't handle this property, fall through and despecialize to the
2778 * general lambda case.
2779 */
2780 rdata.elembase = nullptr;
2781 }
2782
2783 if (rdata.lambda) {
2784 RootedObject lambda(cx, rdata.lambda);
2785
2786 /*
2787 * In the lambda case, not only do we find the replacement string's
2788 * length, we compute repstr and return it via rdata for use within
2789 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2790 * index, input), i.e., all the properties of a regexp match array.
2791 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2792 * We grab up stack space to keep the newborn strings GC-rooted.
2793 */
2794 unsigned p = res->getMatches().parenCount();
2795 unsigned argc = 1 + p + 2;
2796
2797 InvokeArgs& args = rdata.fig.args();
2798 if (!args.init(cx, argc))
2799 return false;
2800
2801 args.setCallee(ObjectValue(*lambda));
2802 args.setThis(UndefinedValue());
2803
2804 /* Push $&, $1, $2, ... */
2805 unsigned argi = 0;
2806 if (!res->createLastMatch(cx, args[argi++]))
2807 return false;
2808
2809 for (size_t i = 0; i < res->getMatches().parenCount(); ++i) {
2810 if (!res->createParen(cx, i + 1, args[argi++]))
2811 return false;
2812 }
2813
2814 /* Push match index and input string. */
2815 args[argi++].setInt32(res->getMatches()[0].start);
2816 args[argi].setString(rdata.str);
2817
2818 if (!rdata.fig.invoke(cx))
2819 return false;
2820
2821 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2822 JSString* repstr = ToString<CanGC>(cx, args.rval());
2823 if (!repstr)
2824 return false;
2825 rdata.repstr = repstr->ensureLinear(cx);
2826 if (!rdata.repstr)
2827 return false;
2828 *sizep = rdata.repstr->length();
2829 return true;
2830 }
2831
2832 return rdata.repstr->hasLatin1Chars()
2833 ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
2834 : FindReplaceLengthString<char16_t>(cx, res, rdata, sizep);
2835 }
2836
2837 /*
2838 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2839 * derived from FindReplaceLength), and has been inflated to TwoByte if
2840 * necessary.
2841 */
2842 template <typename CharT>
2843 static void
DoReplace(RegExpStatics * res,ReplaceData & rdata)2844 DoReplace(RegExpStatics* res, ReplaceData& rdata)
2845 {
2846 AutoCheckCannotGC nogc;
2847 JSLinearString* repstr = rdata.repstr;
2848 const CharT* bp = repstr->chars<CharT>(nogc);
2849 const CharT* cp = bp;
2850
2851 if (rdata.dollarIndex != UINT32_MAX) {
2852 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2853 const CharT* dp = bp + rdata.dollarIndex;
2854 const CharT* ep = bp + repstr->length();
2855 do {
2856 /* Move one of the constant portions of the replacement value. */
2857 size_t len = dp - cp;
2858 rdata.sb.infallibleAppend(cp, len);
2859 cp = dp;
2860
2861 JSSubString sub;
2862 size_t skip;
2863 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2864 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
2865 cp += skip;
2866 dp += skip;
2867 } else {
2868 dp++;
2869 }
2870
2871 dp = js_strchr_limit(dp, '$', ep);
2872 } while (dp);
2873 }
2874 rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
2875 }
2876
2877 static bool
ReplaceRegExp(JSContext * cx,RegExpStatics * res,ReplaceData & rdata)2878 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata)
2879 {
2880
2881 const MatchPair& match = res->getMatches()[0];
2882 MOZ_ASSERT(!match.isUndefined());
2883 MOZ_ASSERT(match.limit >= match.start && match.limit >= 0);
2884
2885 rdata.calledBack = true;
2886 size_t leftoff = rdata.leftIndex;
2887 size_t leftlen = match.start - leftoff;
2888 rdata.leftIndex = match.limit;
2889
2890 size_t replen = 0; /* silence 'unused' warning */
2891 if (!FindReplaceLength(cx, res, rdata, &replen))
2892 return false;
2893
2894 CheckedInt<uint32_t> newlen(rdata.sb.length());
2895 newlen += leftlen;
2896 newlen += replen;
2897 if (!newlen.isValid()) {
2898 ReportAllocationOverflow(cx);
2899 return false;
2900 }
2901
2902 /*
2903 * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2904 * inflation later on.
2905 */
2906 JSLinearString& str = rdata.str->asLinear(); /* flattened for regexp */
2907 if (str.hasTwoByteChars() || rdata.repstr->hasTwoByteChars()) {
2908 if (!rdata.sb.ensureTwoByteChars())
2909 return false;
2910 }
2911
2912 if (!rdata.sb.reserve(newlen.value()))
2913 return false;
2914
2915 /* Append skipped-over portion of the search value. */
2916 rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
2917
2918 if (rdata.repstr->hasLatin1Chars())
2919 DoReplace<Latin1Char>(res, rdata);
2920 else
2921 DoReplace<char16_t>(res, rdata);
2922 return true;
2923 }
2924
2925 static JSString*
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleString repstr,const FlatMatch & fm)2926 BuildFlatReplacement(JSContext* cx, HandleString textstr, HandleString repstr,
2927 const FlatMatch& fm)
2928 {
2929 RopeBuilder builder(cx);
2930 size_t match = fm.match();
2931 size_t matchEnd = match + fm.patternLength();
2932
2933 if (textstr->isRope()) {
2934 /*
2935 * If we are replacing over a rope, avoid flattening it by iterating
2936 * through it, building a new rope.
2937 */
2938 StringSegmentRange r(cx);
2939 if (!r.init(textstr))
2940 return nullptr;
2941
2942 size_t pos = 0;
2943 while (!r.empty()) {
2944 RootedString str(cx, r.front());
2945 size_t len = str->length();
2946 size_t strEnd = pos + len;
2947 if (pos < matchEnd && strEnd > match) {
2948 /*
2949 * We need to special-case any part of the rope that overlaps
2950 * with the replacement string.
2951 */
2952 if (match >= pos) {
2953 /*
2954 * If this part of the rope overlaps with the left side of
2955 * the pattern, then it must be the only one to overlap with
2956 * the first character in the pattern, so we include the
2957 * replacement string here.
2958 */
2959 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2960 if (!leftSide ||
2961 !builder.append(leftSide) ||
2962 !builder.append(repstr))
2963 {
2964 return nullptr;
2965 }
2966 }
2967
2968 /*
2969 * If str runs off the end of the matched string, append the
2970 * last part of str.
2971 */
2972 if (strEnd > matchEnd) {
2973 RootedString rightSide(cx, NewDependentString(cx, str, matchEnd - pos,
2974 strEnd - matchEnd));
2975 if (!rightSide || !builder.append(rightSide))
2976 return nullptr;
2977 }
2978 } else {
2979 if (!builder.append(str))
2980 return nullptr;
2981 }
2982 pos += str->length();
2983 if (!r.popFront())
2984 return nullptr;
2985 }
2986 } else {
2987 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, match));
2988 if (!leftSide)
2989 return nullptr;
2990 RootedString rightSide(cx);
2991 rightSide = NewDependentString(cx, textstr, match + fm.patternLength(),
2992 textstr->length() - match - fm.patternLength());
2993 if (!rightSide ||
2994 !builder.append(leftSide) ||
2995 !builder.append(repstr) ||
2996 !builder.append(rightSide))
2997 {
2998 return nullptr;
2999 }
3000 }
3001
3002 return builder.result();
3003 }
3004
3005 template <typename CharT>
3006 static bool
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,const FlatMatch & fm,JSLinearString * text,const CharT * repChars,size_t repLength)3007 AppendDollarReplacement(StringBuffer& newReplaceChars, size_t firstDollarIndex,
3008 const FlatMatch& fm, JSLinearString* text,
3009 const CharT* repChars, size_t repLength)
3010 {
3011 MOZ_ASSERT(firstDollarIndex < repLength);
3012
3013 size_t matchStart = fm.match();
3014 size_t matchLimit = matchStart + fm.patternLength();
3015
3016 /* Move the pre-dollar chunk in bulk. */
3017 newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
3018
3019 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
3020 const CharT* repLimit = repChars + repLength;
3021 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
3022 if (*it != '$' || it == repLimit - 1) {
3023 if (!newReplaceChars.append(*it))
3024 return false;
3025 continue;
3026 }
3027
3028 switch (*(it + 1)) {
3029 case '$': /* Eat one of the dollars. */
3030 if (!newReplaceChars.append(*it))
3031 return false;
3032 break;
3033 case '&':
3034 if (!newReplaceChars.appendSubstring(text, matchStart, matchLimit - matchStart))
3035 return false;
3036 break;
3037 case '`':
3038 if (!newReplaceChars.appendSubstring(text, 0, matchStart))
3039 return false;
3040 break;
3041 case '\'':
3042 if (!newReplaceChars.appendSubstring(text, matchLimit, text->length() - matchLimit))
3043 return false;
3044 break;
3045 default: /* The dollar we saw was not special (no matter what its mother told it). */
3046 if (!newReplaceChars.append(*it))
3047 return false;
3048 continue;
3049 }
3050 ++it; /* We always eat an extra char in the above switch. */
3051 }
3052
3053 return true;
3054 }
3055
3056 /*
3057 * Perform a linear-scan dollar substitution on the replacement text,
3058 * constructing a result string that looks like:
3059 *
3060 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
3061 */
3062 static JSString*
BuildDollarReplacement(JSContext * cx,JSString * textstrArg,JSLinearString * repstr,uint32_t firstDollarIndex,const FlatMatch & fm)3063 BuildDollarReplacement(JSContext* cx, JSString* textstrArg, JSLinearString* repstr,
3064 uint32_t firstDollarIndex, const FlatMatch& fm)
3065 {
3066 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
3067 if (!textstr)
3068 return nullptr;
3069
3070 size_t matchStart = fm.match();
3071 size_t matchLimit = matchStart + fm.patternLength();
3072
3073 /*
3074 * Most probably:
3075 *
3076 * len(newstr) >= len(orig) - len(match) + len(replacement)
3077 *
3078 * Note that dollar vars _could_ make the resulting text smaller than this.
3079 */
3080 StringBuffer newReplaceChars(cx);
3081 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
3082 return nullptr;
3083
3084 if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
3085 return nullptr;
3086
3087 bool res;
3088 if (repstr->hasLatin1Chars()) {
3089 AutoCheckCannotGC nogc;
3090 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3091 repstr->latin1Chars(nogc), repstr->length());
3092 } else {
3093 AutoCheckCannotGC nogc;
3094 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3095 repstr->twoByteChars(nogc), repstr->length());
3096 }
3097 if (!res)
3098 return nullptr;
3099
3100 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, matchStart));
3101 if (!leftSide)
3102 return nullptr;
3103
3104 RootedString newReplace(cx, newReplaceChars.finishString());
3105 if (!newReplace)
3106 return nullptr;
3107
3108 MOZ_ASSERT(textstr->length() >= matchLimit);
3109 RootedString rightSide(cx, NewDependentString(cx, textstr, matchLimit,
3110 textstr->length() - matchLimit));
3111 if (!rightSide)
3112 return nullptr;
3113
3114 RopeBuilder builder(cx);
3115 if (!builder.append(leftSide) || !builder.append(newReplace) || !builder.append(rightSide))
3116 return nullptr;
3117
3118 return builder.result();
3119 }
3120
3121 struct StringRange
3122 {
3123 size_t start;
3124 size_t length;
3125
StringRangeStringRange3126 StringRange(size_t s, size_t l)
3127 : start(s), length(l)
3128 { }
3129 };
3130
3131 template <typename CharT>
3132 static void
CopySubstringsToFatInline(JSFatInlineString * dest,const CharT * src,const StringRange * ranges,size_t rangesLen,size_t outputLen)3133 CopySubstringsToFatInline(JSFatInlineString* dest, const CharT* src, const StringRange* ranges,
3134 size_t rangesLen, size_t outputLen)
3135 {
3136 CharT* buf = dest->init<CharT>(outputLen);
3137 size_t pos = 0;
3138 for (size_t i = 0; i < rangesLen; i++) {
3139 PodCopy(buf + pos, src + ranges[i].start, ranges[i].length);
3140 pos += ranges[i].length;
3141 }
3142
3143 MOZ_ASSERT(pos == outputLen);
3144 buf[outputLen] = 0;
3145 }
3146
3147 static inline JSFatInlineString*
FlattenSubstrings(JSContext * cx,HandleLinearString str,const StringRange * ranges,size_t rangesLen,size_t outputLen)3148 FlattenSubstrings(JSContext* cx, HandleLinearString str, const StringRange* ranges,
3149 size_t rangesLen, size_t outputLen)
3150 {
3151 JSFatInlineString* result = Allocate<JSFatInlineString>(cx);
3152 if (!result)
3153 return nullptr;
3154
3155 AutoCheckCannotGC nogc;
3156 if (str->hasLatin1Chars())
3157 CopySubstringsToFatInline(result, str->latin1Chars(nogc), ranges, rangesLen, outputLen);
3158 else
3159 CopySubstringsToFatInline(result, str->twoByteChars(nogc), ranges, rangesLen, outputLen);
3160 return result;
3161 }
3162
3163 static JSString*
AppendSubstrings(JSContext * cx,HandleLinearString str,const StringRange * ranges,size_t rangesLen)3164 AppendSubstrings(JSContext* cx, HandleLinearString str, const StringRange* ranges,
3165 size_t rangesLen)
3166 {
3167 MOZ_ASSERT(rangesLen);
3168
3169 /* For single substrings, construct a dependent string. */
3170 if (rangesLen == 1)
3171 return NewDependentString(cx, str, ranges[0].start, ranges[0].length);
3172
3173 bool isLatin1 = str->hasLatin1Chars();
3174 uint32_t fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
3175 if (isLatin1)
3176 fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_LATIN1;
3177
3178 /* Collect substrings into a rope */
3179 size_t i = 0;
3180 RopeBuilder rope(cx);
3181 RootedString part(cx, nullptr);
3182 while (i < rangesLen) {
3183
3184 /* Find maximum range that fits in JSFatInlineString */
3185 size_t substrLen = 0;
3186 size_t end = i;
3187 for (; end < rangesLen; end++) {
3188 if (substrLen + ranges[end].length > fatInlineMaxLength)
3189 break;
3190 substrLen += ranges[end].length;
3191 }
3192
3193 if (i == end) {
3194 /* Not even one range fits JSFatInlineString, use DependentString */
3195 const StringRange& sr = ranges[i++];
3196 part = NewDependentString(cx, str, sr.start, sr.length);
3197 } else {
3198 /* Copy the ranges (linearly) into a JSFatInlineString */
3199 part = FlattenSubstrings(cx, str, ranges + i, end - i, substrLen);
3200 i = end;
3201 }
3202
3203 if (!part)
3204 return nullptr;
3205
3206 /* Appending to the rope permanently roots the substring. */
3207 if (!rope.append(part))
3208 return nullptr;
3209 }
3210
3211 return rope.result();
3212 }
3213
3214 static JSString*
StrReplaceRegexpRemove(JSContext * cx,HandleString str,RegExpShared & re)3215 StrReplaceRegexpRemove(JSContext* cx, HandleString str, RegExpShared& re)
3216 {
3217 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3218 if (!linearStr)
3219 return nullptr;
3220
3221 Vector<StringRange, 16, SystemAllocPolicy> ranges;
3222
3223 size_t charsLen = linearStr->length();
3224
3225 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3226 size_t startIndex = 0; /* Index used for iterating through the string. */
3227 size_t lastIndex = 0; /* Index after last successful match. */
3228 size_t lazyIndex = 0; /* Index before last successful match. */
3229
3230 /* Accumulate StringRanges for unmatched substrings. */
3231 while (startIndex <= charsLen) {
3232 if (!CheckForInterrupt(cx))
3233 return nullptr;
3234
3235 RegExpRunStatus status = re.execute(cx, linearStr, startIndex, &matches);
3236 if (status == RegExpRunStatus_Error)
3237 return nullptr;
3238 if (status == RegExpRunStatus_Success_NotFound)
3239 break;
3240 MatchPair& match = matches[0];
3241
3242 /* Include the latest unmatched substring. */
3243 if (size_t(match.start) > lastIndex) {
3244 if (!ranges.append(StringRange(lastIndex, match.start - lastIndex)))
3245 return nullptr;
3246 }
3247
3248 lazyIndex = lastIndex;
3249 lastIndex = match.limit;
3250
3251 startIndex = match.isEmpty() ? match.limit + 1 : match.limit;
3252
3253 /* Non-global removal executes at most once. */
3254 if (!re.global())
3255 break;
3256 }
3257
3258 RegExpStatics* res;
3259
3260 /* If unmatched, return the input string. */
3261 if (!lastIndex) {
3262 if (startIndex > 0) {
3263 res = cx->global()->getRegExpStatics(cx);
3264 if (!res)
3265 return nullptr;
3266 res->updateLazily(cx, linearStr, &re, lazyIndex);
3267 }
3268
3269 return str;
3270 }
3271
3272 /* The last successful match updates the RegExpStatics. */
3273 res = cx->global()->getRegExpStatics(cx);
3274 if (!res)
3275 return nullptr;
3276
3277 res->updateLazily(cx, linearStr, &re, lazyIndex);
3278
3279 /* Include any remaining part of the string. */
3280 if (lastIndex < charsLen) {
3281 if (!ranges.append(StringRange(lastIndex, charsLen - lastIndex)))
3282 return nullptr;
3283 }
3284
3285 /* Handle the empty string before calling .begin(). */
3286 if (ranges.empty())
3287 return cx->runtime()->emptyString;
3288
3289 return AppendSubstrings(cx, linearStr, ranges.begin(), ranges.length());
3290 }
3291
3292 static inline JSString*
StrReplaceRegExp(JSContext * cx,ReplaceData & rdata)3293 StrReplaceRegExp(JSContext* cx, ReplaceData& rdata)
3294 {
3295 rdata.leftIndex = 0;
3296 rdata.calledBack = false;
3297
3298 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3299 if (!res)
3300 return nullptr;
3301
3302 RegExpShared& re = rdata.g.regExp();
3303
3304 // The spec doesn't describe this function very clearly, so we go ahead and
3305 // assume that when the input to String.prototype.replace is a global
3306 // RegExp, calling the replacer function (assuming one was provided) takes
3307 // place only after the matching is done. See the comment at the beginning
3308 // of DoMatchGlobal explaining why we can zero the the RegExp object's
3309 // lastIndex property here.
3310 if (re.global() && !rdata.g.zeroLastIndex(cx))
3311 return nullptr;
3312
3313 /* Optimize removal. */
3314 if (rdata.repstr && rdata.repstr->length() == 0) {
3315 MOZ_ASSERT(!rdata.lambda && !rdata.elembase && rdata.dollarIndex == UINT32_MAX);
3316 return StrReplaceRegexpRemove(cx, rdata.str, re);
3317 }
3318
3319 RootedLinearString linearStr(cx, rdata.str->ensureLinear(cx));
3320 if (!linearStr)
3321 return nullptr;
3322
3323 size_t rightContextOffset = 0;
3324 if (re.global()) {
3325 if (!DoMatchForReplaceGlobal(cx, res, linearStr, re, rdata, &rightContextOffset))
3326 return nullptr;
3327 } else {
3328 if (!DoMatchForReplaceLocal(cx, res, linearStr, re, rdata, &rightContextOffset))
3329 return nullptr;
3330 }
3331
3332 if (!rdata.calledBack) {
3333 /* Didn't match, so the string is unmodified. */
3334 return rdata.str;
3335 }
3336
3337 MOZ_ASSERT(rightContextOffset <= rdata.str->length());
3338 size_t length = rdata.str->length() - rightContextOffset;
3339 if (!rdata.sb.appendSubstring(rdata.str, rightContextOffset, length))
3340 return nullptr;
3341
3342 return rdata.sb.finishString();
3343 }
3344
3345 static inline bool
str_replace_regexp(JSContext * cx,const CallArgs & args,ReplaceData & rdata)3346 str_replace_regexp(JSContext* cx, const CallArgs& args, ReplaceData& rdata)
3347 {
3348 if (!rdata.g.normalizeRegExp(cx, true, 2, args))
3349 return false;
3350
3351 JSString* res = StrReplaceRegExp(cx, rdata);
3352 if (!res)
3353 return false;
3354
3355 args.rval().setString(res);
3356 return true;
3357 }
3358
3359 JSString*
str_replace_regexp_raw(JSContext * cx,HandleString string,Handle<RegExpObject * > regexp,HandleString replacement)3360 js::str_replace_regexp_raw(JSContext* cx, HandleString string, Handle<RegExpObject*> regexp,
3361 HandleString replacement)
3362 {
3363 /* Optimize removal, so we don't have to create ReplaceData */
3364 if (replacement->length() == 0) {
3365 StringRegExpGuard guard(cx);
3366 if (!guard.initRegExp(cx, regexp))
3367 return nullptr;
3368
3369 RegExpShared& re = guard.regExp();
3370 return StrReplaceRegexpRemove(cx, string, re);
3371 }
3372
3373 ReplaceData rdata(cx);
3374 rdata.str = string;
3375
3376 JSLinearString* repl = replacement->ensureLinear(cx);
3377 if (!repl)
3378 return nullptr;
3379
3380 rdata.setReplacementString(repl);
3381
3382 if (!rdata.g.initRegExp(cx, regexp))
3383 return nullptr;
3384
3385 return StrReplaceRegExp(cx, rdata);
3386 }
3387
3388 static JSString*
StrReplaceString(JSContext * cx,ReplaceData & rdata,const FlatMatch & fm)3389 StrReplaceString(JSContext* cx, ReplaceData& rdata, const FlatMatch& fm)
3390 {
3391 /*
3392 * Note: we could optimize the text.length == pattern.length case if we wanted,
3393 * even in the presence of dollar metachars.
3394 */
3395 if (rdata.dollarIndex != UINT32_MAX)
3396 return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollarIndex, fm);
3397 return BuildFlatReplacement(cx, rdata.str, rdata.repstr, fm);
3398 }
3399
3400 static const uint32_t ReplaceOptArg = 2;
3401
3402 JSString*
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)3403 js::str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
3404 HandleString replacement)
3405 {
3406 ReplaceData rdata(cx);
3407
3408 rdata.str = string;
3409 JSLinearString* repl = replacement->ensureLinear(cx);
3410 if (!repl)
3411 return nullptr;
3412 rdata.setReplacementString(repl);
3413
3414 if (!rdata.g.init(cx, pattern))
3415 return nullptr;
3416 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, ReplaceOptArg, false);
3417
3418 if (fm->match() < 0)
3419 return string;
3420
3421 return StrReplaceString(cx, rdata, *fm);
3422 }
3423
3424 static inline bool
str_replace_flat_lambda(JSContext * cx,const CallArgs & outerArgs,ReplaceData & rdata,const FlatMatch & fm)3425 str_replace_flat_lambda(JSContext* cx, const CallArgs& outerArgs, ReplaceData& rdata,
3426 const FlatMatch& fm)
3427 {
3428 RootedString matchStr(cx, NewDependentString(cx, rdata.str, fm.match(), fm.patternLength()));
3429 if (!matchStr)
3430 return false;
3431
3432 /* lambda(matchStr, matchStart, textstr) */
3433 static const uint32_t lambdaArgc = 3;
3434 if (!rdata.fig.args().init(cx, lambdaArgc))
3435 return false;
3436
3437 CallArgs& args = rdata.fig.args();
3438 args.setCallee(ObjectValue(*rdata.lambda));
3439 args.setThis(UndefinedValue());
3440
3441 Value* sp = args.array();
3442 sp[0].setString(matchStr);
3443 sp[1].setInt32(fm.match());
3444 sp[2].setString(rdata.str);
3445
3446 if (!rdata.fig.invoke(cx))
3447 return false;
3448
3449 RootedString repstr(cx, ToString<CanGC>(cx, args.rval()));
3450 if (!repstr)
3451 return false;
3452
3453 RootedString leftSide(cx, NewDependentString(cx, rdata.str, 0, fm.match()));
3454 if (!leftSide)
3455 return false;
3456
3457 size_t matchLimit = fm.match() + fm.patternLength();
3458 RootedString rightSide(cx, NewDependentString(cx, rdata.str, matchLimit,
3459 rdata.str->length() - matchLimit));
3460 if (!rightSide)
3461 return false;
3462
3463 RopeBuilder builder(cx);
3464 if (!(builder.append(leftSide) &&
3465 builder.append(repstr) &&
3466 builder.append(rightSide))) {
3467 return false;
3468 }
3469
3470 outerArgs.rval().setString(builder.result());
3471 return true;
3472 }
3473
3474 /*
3475 * Pattern match the script to check if it is is indexing into a particular
3476 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3477 * such cases, which are used by javascript packers (particularly the popular
3478 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3479 * code patterns generated by such packers here.
3480 */
3481 static bool
LambdaIsGetElem(JSContext * cx,JSObject & lambda,MutableHandleNativeObject pobj)3482 LambdaIsGetElem(JSContext* cx, JSObject& lambda, MutableHandleNativeObject pobj)
3483 {
3484 if (!lambda.is<JSFunction>())
3485 return true;
3486
3487 RootedFunction fun(cx, &lambda.as<JSFunction>());
3488 if (!fun->isInterpreted() || fun->isClassConstructor())
3489 return true;
3490
3491 JSScript* script = fun->getOrCreateScript(cx);
3492 if (!script)
3493 return false;
3494
3495 jsbytecode* pc = script->code();
3496
3497 /*
3498 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3499 * Rule out the (unlikely) possibility of a function with a call object
3500 * since it would make our scope walk off by 1.
3501 */
3502 if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->needsCallObject())
3503 return true;
3504 ScopeCoordinate sc(pc);
3505 ScopeObject* scope = &fun->environment()->as<ScopeObject>();
3506 for (unsigned i = 0; i < sc.hops(); ++i)
3507 scope = &scope->enclosingScope().as<ScopeObject>();
3508 Value b = scope->aliasedVar(sc);
3509 pc += JSOP_GETALIASEDVAR_LENGTH;
3510
3511 /* Look for 'a' to be the lambda's first argument. */
3512 if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0)
3513 return true;
3514 pc += JSOP_GETARG_LENGTH;
3515
3516 /* 'b[a]' */
3517 if (JSOp(*pc) != JSOP_GETELEM)
3518 return true;
3519 pc += JSOP_GETELEM_LENGTH;
3520
3521 /* 'return b[a]' */
3522 if (JSOp(*pc) != JSOP_RETURN)
3523 return true;
3524
3525 /* 'b' must behave like a normal object. */
3526 if (!b.isObject())
3527 return true;
3528
3529 JSObject& bobj = b.toObject();
3530 const Class* clasp = bobj.getClass();
3531 if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty)
3532 return true;
3533
3534 pobj.set(&bobj.as<NativeObject>());
3535 return true;
3536 }
3537
3538 bool
str_replace(JSContext * cx,unsigned argc,Value * vp)3539 js::str_replace(JSContext* cx, unsigned argc, Value* vp)
3540 {
3541 CallArgs args = CallArgsFromVp(argc, vp);
3542
3543 ReplaceData rdata(cx);
3544 rdata.str = ThisToStringForStringProto(cx, args);
3545 if (!rdata.str)
3546 return false;
3547
3548 if (!rdata.g.init(cx, args))
3549 return false;
3550
3551 /* Extract replacement string/function. */
3552 if (args.length() >= ReplaceOptArg && IsCallable(args[1])) {
3553 rdata.setReplacementFunction(&args[1].toObject());
3554
3555 if (!LambdaIsGetElem(cx, *rdata.lambda, &rdata.elembase))
3556 return false;
3557 } else {
3558 JSLinearString* string = ArgToRootedString(cx, args, 1);
3559 if (!string)
3560 return false;
3561
3562 rdata.setReplacementString(string);
3563 }
3564
3565 rdata.fig.initFunction(ObjectOrNullValue(rdata.lambda));
3566
3567 /*
3568 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3569 * its input to a regular expression. (Even if it contains metachars.)
3570 *
3571 * However, if the user invokes our (non-standard) |flags| argument
3572 * extension then we revert to creating a regular expression. Note that
3573 * this is observable behavior through the side-effect mutation of the
3574 * |RegExp| statics.
3575 */
3576
3577 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
3578
3579 if (!fm) {
3580 if (cx->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3581 return false;
3582 return str_replace_regexp(cx, args, rdata);
3583 }
3584
3585 if (fm->match() < 0) {
3586 args.rval().setString(rdata.str);
3587 return true;
3588 }
3589
3590 if (rdata.lambda)
3591 return str_replace_flat_lambda(cx, args, rdata, *fm);
3592
3593 JSString* res = StrReplaceString(cx, rdata, *fm);
3594 if (!res)
3595 return false;
3596
3597 args.rval().setString(res);
3598 return true;
3599 }
3600
3601 namespace {
3602
3603 class SplitMatchResult {
3604 size_t endIndex_;
3605 size_t length_;
3606
3607 public:
setFailure()3608 void setFailure() {
3609 JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
3610 endIndex_ = SIZE_MAX;
3611 }
isFailure() const3612 bool isFailure() const {
3613 return endIndex_ == SIZE_MAX;
3614 }
endIndex() const3615 size_t endIndex() const {
3616 MOZ_ASSERT(!isFailure());
3617 return endIndex_;
3618 }
length() const3619 size_t length() const {
3620 MOZ_ASSERT(!isFailure());
3621 return length_;
3622 }
setResult(size_t length,size_t endIndex)3623 void setResult(size_t length, size_t endIndex) {
3624 length_ = length;
3625 endIndex_ = endIndex;
3626 }
3627 };
3628
3629 } /* anonymous namespace */
3630
3631 template<class Matcher>
3632 static JSObject*
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,const Matcher & splitMatch,HandleObjectGroup group)3633 SplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, const Matcher& splitMatch,
3634 HandleObjectGroup group)
3635 {
3636 size_t strLength = str->length();
3637 SplitMatchResult result;
3638
3639 /* Step 11. */
3640 if (strLength == 0) {
3641 if (!splitMatch(cx, str, 0, &result))
3642 return nullptr;
3643
3644 /*
3645 * NB: Unlike in the non-empty string case, it's perfectly fine
3646 * (indeed the spec requires it) if we match at the end of the
3647 * string. Thus these cases should hold:
3648 *
3649 * var a = "".split("");
3650 * assertEq(a.length, 0);
3651 * var b = "".split(/.?/);
3652 * assertEq(b.length, 0);
3653 */
3654 if (!result.isFailure())
3655 return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3656
3657 RootedValue v(cx, StringValue(str));
3658 return NewCopiedArrayTryUseGroup(cx, group, v.address(), 1);
3659 }
3660
3661 /* Step 12. */
3662 size_t lastEndIndex = 0;
3663 size_t index = 0;
3664
3665 /* Step 13. */
3666 AutoValueVector splits(cx);
3667
3668 while (index < strLength) {
3669 /* Step 13(a). */
3670 if (!splitMatch(cx, str, index, &result))
3671 return nullptr;
3672
3673 /*
3674 * Step 13(b).
3675 *
3676 * Our match algorithm differs from the spec in that it returns the
3677 * next index at which a match happens. If no match happens we're
3678 * done.
3679 *
3680 * But what if the match is at the end of the string (and the string is
3681 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3682 * specially exclude it. Thus this case should hold:
3683 *
3684 * var a = "abc".split(/\b/);
3685 * assertEq(a.length, 1);
3686 * assertEq(a[0], "abc");
3687 */
3688 if (result.isFailure())
3689 break;
3690
3691 /* Step 13(c)(i). */
3692 size_t sepLength = result.length();
3693 size_t endIndex = result.endIndex();
3694 if (sepLength == 0 && endIndex == strLength)
3695 break;
3696
3697 /* Step 13(c)(ii). */
3698 if (endIndex == lastEndIndex) {
3699 index++;
3700 continue;
3701 }
3702
3703 /* Step 13(c)(iii). */
3704 MOZ_ASSERT(lastEndIndex < endIndex);
3705 MOZ_ASSERT(sepLength <= strLength);
3706 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3707
3708 /* Steps 13(c)(iii)(1-3). */
3709 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3710 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3711 if (!sub || !splits.append(StringValue(sub)))
3712 return nullptr;
3713
3714 /* Step 13(c)(iii)(4). */
3715 if (splits.length() == limit)
3716 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3717
3718 /* Step 13(c)(iii)(5). */
3719 lastEndIndex = endIndex;
3720
3721 /* Step 13(c)(iii)(6-7). */
3722 if (Matcher::returnsCaptures) {
3723 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3724 if (!res)
3725 return nullptr;
3726
3727 const MatchPairs& matches = res->getMatches();
3728 for (size_t i = 0; i < matches.parenCount(); i++) {
3729 /* Steps 13(c)(iii)(7)(a-c). */
3730 if (!matches[i + 1].isUndefined()) {
3731 JSSubString parsub;
3732 res->getParen(i + 1, &parsub);
3733 sub = NewDependentString(cx, parsub.base, parsub.offset, parsub.length);
3734 if (!sub || !splits.append(StringValue(sub)))
3735 return nullptr;
3736 } else {
3737 if (!splits.append(UndefinedValue()))
3738 return nullptr;
3739 }
3740
3741 /* Step 13(c)(iii)(7)(d). */
3742 if (splits.length() == limit)
3743 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3744 }
3745 }
3746
3747 /* Step 13(c)(iii)(8). */
3748 index = lastEndIndex;
3749 }
3750
3751 /* Steps 14-15. */
3752 JSString* sub = NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3753 if (!sub || !splits.append(StringValue(sub)))
3754 return nullptr;
3755
3756 /* Step 16. */
3757 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3758 }
3759
3760 // Fast-path for splitting a string into a character array via split("").
3761 static JSObject*
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleObjectGroup group)3762 CharSplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, HandleObjectGroup group)
3763 {
3764 size_t strLength = str->length();
3765 if (strLength == 0)
3766 return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3767
3768 js::StaticStrings& staticStrings = cx->staticStrings();
3769 uint32_t resultlen = (limit < strLength ? limit : strLength);
3770
3771 AutoValueVector splits(cx);
3772 if (!splits.reserve(resultlen))
3773 return nullptr;
3774
3775 for (size_t i = 0; i < resultlen; ++i) {
3776 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3777 if (!sub)
3778 return nullptr;
3779 splits.infallibleAppend(StringValue(sub));
3780 }
3781
3782 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
3783 }
3784
3785 namespace {
3786
3787 /*
3788 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3789 * paths for regular expression and string separators.
3790 *
3791 * The algorithm differs from the spec in that the we return the next index at
3792 * which a match happens.
3793 */
3794 class SplitRegExpMatcher
3795 {
3796 RegExpShared& re;
3797 RegExpStatics* res;
3798
3799 public:
SplitRegExpMatcher(RegExpShared & re,RegExpStatics * res)3800 SplitRegExpMatcher(RegExpShared& re, RegExpStatics* res) : re(re), res(res) {}
3801
3802 static const bool returnsCaptures = true;
3803
operator ()(JSContext * cx,HandleLinearString str,size_t index,SplitMatchResult * result) const3804 bool operator()(JSContext* cx, HandleLinearString str, size_t index,
3805 SplitMatchResult* result) const
3806 {
3807 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3808 RegExpRunStatus status = re.execute(cx, str, index, &matches);
3809 if (status == RegExpRunStatus_Error)
3810 return false;
3811
3812 if (status == RegExpRunStatus_Success_NotFound) {
3813 result->setFailure();
3814 return true;
3815 }
3816
3817 if (!res->updateFromMatchPairs(cx, str, matches))
3818 return false;
3819
3820 JSSubString sep;
3821 res->getLastMatch(&sep);
3822
3823 result->setResult(sep.length, matches[0].limit);
3824 return true;
3825 }
3826 };
3827
3828 class SplitStringMatcher
3829 {
3830 RootedLinearString sep;
3831
3832 public:
SplitStringMatcher(JSContext * cx,HandleLinearString sep)3833 SplitStringMatcher(JSContext* cx, HandleLinearString sep)
3834 : sep(cx, sep)
3835 {}
3836
3837 static const bool returnsCaptures = false;
3838
operator ()(JSContext * cx,JSLinearString * str,size_t index,SplitMatchResult * res) const3839 bool operator()(JSContext* cx, JSLinearString* str, size_t index, SplitMatchResult* res) const
3840 {
3841 MOZ_ASSERT(index == 0 || index < str->length());
3842 int match = StringMatch(str, sep, index);
3843 if (match == -1)
3844 res->setFailure();
3845 else
3846 res->setResult(sep->length(), match + sep->length());
3847 return true;
3848 }
3849 };
3850
3851 } /* anonymous namespace */
3852
3853 /* ES5 15.5.4.14 */
3854 bool
str_split(JSContext * cx,unsigned argc,Value * vp)3855 js::str_split(JSContext* cx, unsigned argc, Value* vp)
3856 {
3857 CallArgs args = CallArgsFromVp(argc, vp);
3858
3859 /* Steps 1-2. */
3860 RootedString str(cx, ThisToStringForStringProto(cx, args));
3861 if (!str)
3862 return false;
3863
3864 RootedObjectGroup group(cx, ObjectGroup::callingAllocationSiteGroup(cx, JSProto_Array));
3865 if (!group)
3866 return false;
3867
3868 /* Step 5: Use the second argument as the split limit, if given. */
3869 uint32_t limit;
3870 if (args.hasDefined(1)) {
3871 double d;
3872 if (!ToNumber(cx, args[1], &d))
3873 return false;
3874 limit = ToUint32(d);
3875 } else {
3876 limit = UINT32_MAX;
3877 }
3878
3879 /* Step 8. */
3880 RegExpGuard re(cx);
3881 RootedLinearString sepstr(cx);
3882 bool sepDefined = args.hasDefined(0);
3883 if (sepDefined) {
3884 ESClassValue cls;
3885 if (!GetClassOfValue(cx, args[0], &cls))
3886 return false;
3887
3888 if (cls == ESClass_RegExp) {
3889 RootedObject obj(cx, &args[0].toObject());
3890 if (!RegExpToShared(cx, obj, &re))
3891 return false;
3892 } else {
3893 sepstr = ArgToRootedString(cx, args, 0);
3894 if (!sepstr)
3895 return false;
3896 }
3897 }
3898
3899 /* Step 9. */
3900 if (limit == 0) {
3901 JSObject* aobj = NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
3902 if (!aobj)
3903 return false;
3904 args.rval().setObject(*aobj);
3905 return true;
3906 }
3907
3908 /* Step 10. */
3909 if (!sepDefined) {
3910 RootedValue v(cx, StringValue(str));
3911 JSObject* aobj = NewCopiedArrayTryUseGroup(cx, group, v.address(), 1);
3912 if (!aobj)
3913 return false;
3914 args.rval().setObject(*aobj);
3915 return true;
3916 }
3917 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3918 if (!linearStr)
3919 return false;
3920
3921 /* Steps 11-15. */
3922 RootedObject aobj(cx);
3923 if (!re.initialized()) {
3924 if (sepstr->length() == 0) {
3925 aobj = CharSplitHelper(cx, linearStr, limit, group);
3926 } else {
3927 SplitStringMatcher matcher(cx, sepstr);
3928 aobj = SplitHelper(cx, linearStr, limit, matcher, group);
3929 }
3930 } else {
3931 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3932 if (!res)
3933 return false;
3934 SplitRegExpMatcher matcher(*re, res);
3935 aobj = SplitHelper(cx, linearStr, limit, matcher, group);
3936 }
3937 if (!aobj)
3938 return false;
3939
3940 /* Step 16. */
3941 MOZ_ASSERT(aobj->group() == group);
3942 args.rval().setObject(*aobj);
3943 return true;
3944 }
3945
3946 JSObject*
str_split_string(JSContext * cx,HandleObjectGroup group,HandleString str,HandleString sep)3947 js::str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep)
3948 {
3949 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3950 if (!linearStr)
3951 return nullptr;
3952
3953 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3954 if (!linearSep)
3955 return nullptr;
3956
3957 uint32_t limit = UINT32_MAX;
3958
3959 if (linearSep->length() == 0)
3960 return CharSplitHelper(cx, linearStr, limit, group);
3961
3962 SplitStringMatcher matcher(cx, linearSep);
3963 return SplitHelper(cx, linearStr, limit, matcher, group);
3964 }
3965
3966 /*
3967 * Python-esque sequence operations.
3968 */
3969 static bool
str_concat(JSContext * cx,unsigned argc,Value * vp)3970 str_concat(JSContext* cx, unsigned argc, Value* vp)
3971 {
3972 CallArgs args = CallArgsFromVp(argc, vp);
3973 JSString* str = ThisToStringForStringProto(cx, args);
3974 if (!str)
3975 return false;
3976
3977 for (unsigned i = 0; i < args.length(); i++) {
3978 JSString* argStr = ToString<NoGC>(cx, args[i]);
3979 if (!argStr) {
3980 RootedString strRoot(cx, str);
3981 argStr = ToString<CanGC>(cx, args[i]);
3982 if (!argStr)
3983 return false;
3984 str = strRoot;
3985 }
3986
3987 JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
3988 if (next) {
3989 str = next;
3990 } else {
3991 RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3992 str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3993 if (!str)
3994 return false;
3995 }
3996 }
3997
3998 args.rval().setString(str);
3999 return true;
4000 }
4001
4002 static const JSFunctionSpec string_methods[] = {
4003 #if JS_HAS_TOSOURCE
4004 JS_FN(js_toSource_str, str_toSource, 0,0),
4005 #endif
4006
4007 /* Java-like methods. */
4008 JS_FN(js_toString_str, str_toString, 0,0),
4009 JS_FN(js_valueOf_str, str_toString, 0,0),
4010 JS_FN("toLowerCase", str_toLowerCase, 0,JSFUN_GENERIC_NATIVE),
4011 JS_FN("toUpperCase", str_toUpperCase, 0,JSFUN_GENERIC_NATIVE),
4012 JS_INLINABLE_FN("charAt", str_charAt, 1,JSFUN_GENERIC_NATIVE, StringCharAt),
4013 JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1,JSFUN_GENERIC_NATIVE, StringCharCodeAt),
4014 JS_SELF_HOSTED_FN("substring", "String_substring", 2,0),
4015 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
4016 JS_FN("includes", str_includes, 1,JSFUN_GENERIC_NATIVE),
4017 JS_FN("contains", str_contains, 1,JSFUN_GENERIC_NATIVE),
4018 JS_FN("indexOf", str_indexOf, 1,JSFUN_GENERIC_NATIVE),
4019 JS_FN("lastIndexOf", str_lastIndexOf, 1,JSFUN_GENERIC_NATIVE),
4020 JS_FN("startsWith", str_startsWith, 1,JSFUN_GENERIC_NATIVE),
4021 JS_FN("endsWith", str_endsWith, 1,JSFUN_GENERIC_NATIVE),
4022 JS_FN("trim", str_trim, 0,JSFUN_GENERIC_NATIVE),
4023 JS_FN("trimLeft", str_trimLeft, 0,JSFUN_GENERIC_NATIVE),
4024 JS_FN("trimRight", str_trimRight, 0,JSFUN_GENERIC_NATIVE),
4025 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
4026 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
4027 #if EXPOSE_INTL_API
4028 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
4029 #else
4030 JS_FN("localeCompare", str_localeCompare, 1,JSFUN_GENERIC_NATIVE),
4031 #endif
4032 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
4033 #if EXPOSE_INTL_API
4034 JS_FN("normalize", str_normalize, 0,JSFUN_GENERIC_NATIVE),
4035 #endif
4036
4037 /* Perl-ish methods (search is actually Python-esque). */
4038 JS_FN("match", str_match, 1,JSFUN_GENERIC_NATIVE),
4039 JS_FN("search", str_search, 1,JSFUN_GENERIC_NATIVE),
4040 JS_INLINABLE_FN("replace", str_replace, 2,JSFUN_GENERIC_NATIVE, StringReplace),
4041 JS_INLINABLE_FN("split", str_split, 2,JSFUN_GENERIC_NATIVE, StringSplit),
4042 JS_SELF_HOSTED_FN("substr", "String_substr", 2,0),
4043
4044 /* Python-esque sequence methods. */
4045 JS_FN("concat", str_concat, 1,JSFUN_GENERIC_NATIVE),
4046 JS_SELF_HOSTED_FN("slice", "String_slice", 2,0),
4047
4048 /* HTML string methods. */
4049 JS_SELF_HOSTED_FN("bold", "String_bold", 0,0),
4050 JS_SELF_HOSTED_FN("italics", "String_italics", 0,0),
4051 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0),
4052 JS_SELF_HOSTED_FN("strike", "String_strike", 0,0),
4053 JS_SELF_HOSTED_FN("small", "String_small", 0,0),
4054 JS_SELF_HOSTED_FN("big", "String_big", 0,0),
4055 JS_SELF_HOSTED_FN("blink", "String_blink", 0,0),
4056 JS_SELF_HOSTED_FN("sup", "String_sup", 0,0),
4057 JS_SELF_HOSTED_FN("sub", "String_sub", 0,0),
4058 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0),
4059 JS_SELF_HOSTED_FN("link", "String_link", 1,0),
4060 JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0),
4061 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0),
4062
4063 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0,0),
4064 JS_FS_END
4065 };
4066
4067 // ES6 rev 27 (2014 Aug 24) 21.1.1
4068 bool
StringConstructor(JSContext * cx,unsigned argc,Value * vp)4069 js::StringConstructor(JSContext* cx, unsigned argc, Value* vp)
4070 {
4071 CallArgs args = CallArgsFromVp(argc, vp);
4072
4073 RootedString str(cx);
4074 if (args.length() > 0) {
4075 if (!args.isConstructing() && args[0].isSymbol())
4076 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
4077
4078 str = ToString<CanGC>(cx, args[0]);
4079 if (!str)
4080 return false;
4081 } else {
4082 str = cx->runtime()->emptyString;
4083 }
4084
4085 if (args.isConstructing()) {
4086 RootedObject proto(cx);
4087 RootedObject newTarget(cx, &args.newTarget().toObject());
4088 if (!GetPrototypeFromConstructor(cx, newTarget, &proto))
4089 return false;
4090
4091 StringObject* strobj = StringObject::create(cx, str, proto);
4092 if (!strobj)
4093 return false;
4094 args.rval().setObject(*strobj);
4095 return true;
4096 }
4097
4098 args.rval().setString(str);
4099 return true;
4100 }
4101
4102 static bool
str_fromCharCode_few_args(JSContext * cx,const CallArgs & args)4103 str_fromCharCode_few_args(JSContext* cx, const CallArgs& args)
4104 {
4105 MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE);
4106
4107 char16_t chars[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
4108 for (unsigned i = 0; i < args.length(); i++) {
4109 uint16_t code;
4110 if (!ToUint16(cx, args[i], &code))
4111 return false;
4112 chars[i] = char16_t(code);
4113 }
4114 JSString* str = NewStringCopyN<CanGC>(cx, chars, args.length());
4115 if (!str)
4116 return false;
4117 args.rval().setString(str);
4118 return true;
4119 }
4120
4121 bool
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)4122 js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
4123 {
4124 CallArgs args = CallArgsFromVp(argc, vp);
4125
4126 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
4127
4128 // Optimize the single-char case.
4129 if (args.length() == 1)
4130 return str_fromCharCode_one_arg(cx, args[0], args.rval());
4131
4132 // Optimize the case where the result will definitely fit in an inline
4133 // string (thin or fat) and so we don't need to malloc the chars. (We could
4134 // cover some cases where args.length() goes up to
4135 // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
4136 // all Latin1, but it doesn't seem worth the effort.)
4137 if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE)
4138 return str_fromCharCode_few_args(cx, args);
4139
4140 char16_t* chars = cx->pod_malloc<char16_t>(args.length() + 1);
4141 if (!chars)
4142 return false;
4143 for (unsigned i = 0; i < args.length(); i++) {
4144 uint16_t code;
4145 if (!ToUint16(cx, args[i], &code)) {
4146 js_free(chars);
4147 return false;
4148 }
4149 chars[i] = char16_t(code);
4150 }
4151 chars[args.length()] = 0;
4152 JSString* str = NewString<CanGC>(cx, chars, args.length());
4153 if (!str) {
4154 js_free(chars);
4155 return false;
4156 }
4157
4158 args.rval().setString(str);
4159 return true;
4160 }
4161
4162 bool
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)4163 js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval)
4164 {
4165 uint16_t ucode;
4166
4167 if (!ToUint16(cx, code, &ucode))
4168 return false;
4169
4170 if (StaticStrings::hasUnit(ucode)) {
4171 rval.setString(cx->staticStrings().getUnit(ucode));
4172 return true;
4173 }
4174
4175 char16_t c = char16_t(ucode);
4176 JSString* str = NewStringCopyN<CanGC>(cx, &c, 1);
4177 if (!str)
4178 return false;
4179
4180 rval.setString(str);
4181 return true;
4182 }
4183
4184 static const JSFunctionSpec string_static_methods[] = {
4185 JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0, StringFromCharCode),
4186
4187 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 1,0),
4188 JS_SELF_HOSTED_FN("raw", "String_static_raw", 2,0),
4189 JS_SELF_HOSTED_FN("substring", "String_static_substring", 3,0),
4190 JS_SELF_HOSTED_FN("substr", "String_static_substr", 3,0),
4191 JS_SELF_HOSTED_FN("slice", "String_static_slice", 3,0),
4192
4193 // This must be at the end because of bug 853075: functions listed after
4194 // self-hosted methods aren't available in self-hosted code.
4195 #if EXPOSE_INTL_API
4196 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
4197 #endif
4198 JS_FS_END
4199 };
4200
4201 /* static */ Shape*
assignInitialShape(ExclusiveContext * cx,Handle<StringObject * > obj)4202 StringObject::assignInitialShape(ExclusiveContext* cx, Handle<StringObject*> obj)
4203 {
4204 MOZ_ASSERT(obj->empty());
4205
4206 return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT,
4207 JSPROP_PERMANENT | JSPROP_READONLY);
4208 }
4209
4210 JSObject*
InitStringClass(JSContext * cx,HandleObject obj)4211 js::InitStringClass(JSContext* cx, HandleObject obj)
4212 {
4213 MOZ_ASSERT(obj->isNative());
4214
4215 Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>());
4216
4217 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
4218 RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_));
4219 if (!proto || !proto->as<StringObject>().init(cx, empty))
4220 return nullptr;
4221
4222 /* Now create the String function. */
4223 RootedFunction ctor(cx);
4224 ctor = global->createConstructor(cx, StringConstructor, cx->names().String, 1,
4225 AllocKind::FUNCTION, &jit::JitInfo_String);
4226 if (!ctor)
4227 return nullptr;
4228
4229 if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto))
4230 return nullptr;
4231
4232 if (!LinkConstructorAndPrototype(cx, ctor, proto))
4233 return nullptr;
4234
4235 if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
4236 !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods))
4237 {
4238 return nullptr;
4239 }
4240
4241 /*
4242 * Define escape/unescape, the URI encode/decode functions, and maybe
4243 * uneval on the global object.
4244 */
4245 if (!JS_DefineFunctions(cx, global, string_functions))
4246 return nullptr;
4247
4248 return proto;
4249 }
4250
4251 const char*
ValueToPrintable(JSContext * cx,const Value & vArg,JSAutoByteString * bytes,bool asSource)4252 js::ValueToPrintable(JSContext* cx, const Value& vArg, JSAutoByteString* bytes, bool asSource)
4253 {
4254 RootedValue v(cx, vArg);
4255 JSString* str;
4256 if (asSource)
4257 str = ValueToSource(cx, v);
4258 else
4259 str = ToString<CanGC>(cx, v);
4260 if (!str)
4261 return nullptr;
4262 str = QuoteString(cx, str, 0);
4263 if (!str)
4264 return nullptr;
4265 return bytes->encodeLatin1(cx, str);
4266 }
4267
4268 template <AllowGC allowGC>
4269 JSString*
ToStringSlow(ExclusiveContext * cx,typename MaybeRooted<Value,allowGC>::HandleType arg)4270 js::ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg)
4271 {
4272 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4273 MOZ_ASSERT(!arg.isString());
4274
4275 Value v = arg;
4276 if (!v.isPrimitive()) {
4277 if (!cx->shouldBeJSContext() || !allowGC)
4278 return nullptr;
4279 RootedValue v2(cx, v);
4280 if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2))
4281 return nullptr;
4282 v = v2;
4283 }
4284
4285 JSString* str;
4286 if (v.isString()) {
4287 str = v.toString();
4288 } else if (v.isInt32()) {
4289 str = Int32ToString<allowGC>(cx, v.toInt32());
4290 } else if (v.isDouble()) {
4291 str = NumberToString<allowGC>(cx, v.toDouble());
4292 } else if (v.isBoolean()) {
4293 str = BooleanToString(cx, v.toBoolean());
4294 } else if (v.isNull()) {
4295 str = cx->names().null;
4296 } else if (v.isSymbol()) {
4297 if (cx->shouldBeJSContext() && allowGC) {
4298 JS_ReportErrorNumber(cx->asJSContext(), GetErrorMessage, nullptr,
4299 JSMSG_SYMBOL_TO_STRING);
4300 }
4301 return nullptr;
4302 } else {
4303 MOZ_ASSERT(v.isUndefined());
4304 str = cx->names().undefined;
4305 }
4306 return str;
4307 }
4308
4309 template JSString*
4310 js::ToStringSlow<CanGC>(ExclusiveContext* cx, HandleValue arg);
4311
4312 template JSString*
4313 js::ToStringSlow<NoGC>(ExclusiveContext* cx, Value arg);
4314
JS_PUBLIC_API(JSString *)4315 JS_PUBLIC_API(JSString*)
4316 js::ToStringSlow(JSContext* cx, HandleValue v)
4317 {
4318 return ToStringSlow<CanGC>(cx, v);
4319 }
4320
4321 static JSString*
SymbolToSource(JSContext * cx,Symbol * symbol)4322 SymbolToSource(JSContext* cx, Symbol* symbol)
4323 {
4324 RootedString desc(cx, symbol->description());
4325 SymbolCode code = symbol->code();
4326 if (code != SymbolCode::InSymbolRegistry && code != SymbolCode::UniqueSymbol) {
4327 // Well-known symbol.
4328 MOZ_ASSERT(uint32_t(code) < JS::WellKnownSymbolLimit);
4329 return desc;
4330 }
4331
4332 StringBuffer buf(cx);
4333 if (code == SymbolCode::InSymbolRegistry ? !buf.append("Symbol.for(") : !buf.append("Symbol("))
4334 return nullptr;
4335 if (desc) {
4336 desc = StringToSource(cx, desc);
4337 if (!desc || !buf.append(desc))
4338 return nullptr;
4339 }
4340 if (!buf.append(')'))
4341 return nullptr;
4342 return buf.finishString();
4343 }
4344
4345 JSString*
ValueToSource(JSContext * cx,HandleValue v)4346 js::ValueToSource(JSContext* cx, HandleValue v)
4347 {
4348 JS_CHECK_RECURSION(cx, return nullptr);
4349 assertSameCompartment(cx, v);
4350
4351 if (v.isUndefined())
4352 return cx->names().void0;
4353 if (v.isString())
4354 return StringToSource(cx, v.toString());
4355 if (v.isSymbol())
4356 return SymbolToSource(cx, v.toSymbol());
4357 if (v.isPrimitive()) {
4358 /* Special case to preserve negative zero, _contra_ toString. */
4359 if (v.isDouble() && IsNegativeZero(v.toDouble())) {
4360 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4361 static const char16_t js_negzero_ucNstr[] = {'-', '0'};
4362
4363 return NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2);
4364 }
4365 return ToString<CanGC>(cx, v);
4366 }
4367
4368 RootedValue fval(cx);
4369 RootedObject obj(cx, &v.toObject());
4370 if (!GetProperty(cx, obj, obj, cx->names().toSource, &fval))
4371 return nullptr;
4372 if (IsCallable(fval)) {
4373 RootedValue rval(cx);
4374 if (!Invoke(cx, ObjectValue(*obj), fval, 0, nullptr, &rval))
4375 return nullptr;
4376 return ToString<CanGC>(cx, rval);
4377 }
4378
4379 return ObjectToSource(cx, obj);
4380 }
4381
4382 JSString*
StringToSource(JSContext * cx,JSString * str)4383 js::StringToSource(JSContext* cx, JSString* str)
4384 {
4385 return QuoteString(cx, str, '"');
4386 }
4387
4388 bool
EqualChars(JSLinearString * str1,JSLinearString * str2)4389 js::EqualChars(JSLinearString* str1, JSLinearString* str2)
4390 {
4391 MOZ_ASSERT(str1->length() == str2->length());
4392
4393 size_t len = str1->length();
4394
4395 AutoCheckCannotGC nogc;
4396 if (str1->hasTwoByteChars()) {
4397 if (str2->hasTwoByteChars())
4398 return PodEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len);
4399
4400 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
4401 }
4402
4403 if (str2->hasLatin1Chars())
4404 return PodEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
4405
4406 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
4407 }
4408
4409 bool
EqualStrings(JSContext * cx,JSString * str1,JSString * str2,bool * result)4410 js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result)
4411 {
4412 if (str1 == str2) {
4413 *result = true;
4414 return true;
4415 }
4416
4417 size_t length1 = str1->length();
4418 if (length1 != str2->length()) {
4419 *result = false;
4420 return true;
4421 }
4422
4423 JSLinearString* linear1 = str1->ensureLinear(cx);
4424 if (!linear1)
4425 return false;
4426 JSLinearString* linear2 = str2->ensureLinear(cx);
4427 if (!linear2)
4428 return false;
4429
4430 *result = EqualChars(linear1, linear2);
4431 return true;
4432 }
4433
4434 bool
EqualStrings(JSLinearString * str1,JSLinearString * str2)4435 js::EqualStrings(JSLinearString* str1, JSLinearString* str2)
4436 {
4437 if (str1 == str2)
4438 return true;
4439
4440 size_t length1 = str1->length();
4441 if (length1 != str2->length())
4442 return false;
4443
4444 return EqualChars(str1, str2);
4445 }
4446
4447 static int32_t
CompareStringsImpl(JSLinearString * str1,JSLinearString * str2)4448 CompareStringsImpl(JSLinearString* str1, JSLinearString* str2)
4449 {
4450 size_t len1 = str1->length();
4451 size_t len2 = str2->length();
4452
4453 AutoCheckCannotGC nogc;
4454 if (str1->hasLatin1Chars()) {
4455 const Latin1Char* chars1 = str1->latin1Chars(nogc);
4456 return str2->hasLatin1Chars()
4457 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4458 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4459 }
4460
4461 const char16_t* chars1 = str1->twoByteChars(nogc);
4462 return str2->hasLatin1Chars()
4463 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4464 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4465 }
4466
4467 int32_t
CompareChars(const char16_t * s1,size_t len1,JSLinearString * s2)4468 js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2)
4469 {
4470 AutoCheckCannotGC nogc;
4471 return s2->hasLatin1Chars()
4472 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
4473 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
4474 }
4475
4476 bool
CompareStrings(JSContext * cx,JSString * str1,JSString * str2,int32_t * result)4477 js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result)
4478 {
4479 MOZ_ASSERT(str1);
4480 MOZ_ASSERT(str2);
4481
4482 if (str1 == str2) {
4483 *result = 0;
4484 return true;
4485 }
4486
4487 JSLinearString* linear1 = str1->ensureLinear(cx);
4488 if (!linear1)
4489 return false;
4490
4491 JSLinearString* linear2 = str2->ensureLinear(cx);
4492 if (!linear2)
4493 return false;
4494
4495 *result = CompareStringsImpl(linear1, linear2);
4496 return true;
4497 }
4498
4499 int32_t
CompareAtoms(JSAtom * atom1,JSAtom * atom2)4500 js::CompareAtoms(JSAtom* atom1, JSAtom* atom2)
4501 {
4502 return CompareStringsImpl(atom1, atom2);
4503 }
4504
4505 bool
StringEqualsAscii(JSLinearString * str,const char * asciiBytes)4506 js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes)
4507 {
4508 size_t length = strlen(asciiBytes);
4509 #ifdef DEBUG
4510 for (size_t i = 0; i != length; ++i)
4511 MOZ_ASSERT(unsigned(asciiBytes[i]) <= 127);
4512 #endif
4513 if (length != str->length())
4514 return false;
4515
4516 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
4517
4518 AutoCheckCannotGC nogc;
4519 return str->hasLatin1Chars()
4520 ? PodEqual(latin1, str->latin1Chars(nogc), length)
4521 : EqualChars(latin1, str->twoByteChars(nogc), length);
4522 }
4523
4524 size_t
js_strlen(const char16_t * s)4525 js_strlen(const char16_t* s)
4526 {
4527 const char16_t* t;
4528
4529 for (t = s; *t != 0; t++)
4530 continue;
4531 return (size_t)(t - s);
4532 }
4533
4534 int32_t
js_strcmp(const char16_t * lhs,const char16_t * rhs)4535 js_strcmp(const char16_t* lhs, const char16_t* rhs)
4536 {
4537 while (true) {
4538 if (*lhs != *rhs)
4539 return int32_t(*lhs) - int32_t(*rhs);
4540 if (*lhs == 0)
4541 return 0;
4542 ++lhs, ++rhs;
4543 }
4544 }
4545
4546 UniquePtr<char[], JS::FreePolicy>
DuplicateString(js::ExclusiveContext * cx,const char * s)4547 js::DuplicateString(js::ExclusiveContext* cx, const char* s)
4548 {
4549 size_t n = strlen(s) + 1;
4550 auto ret = cx->make_pod_array<char>(n);
4551 if (!ret)
4552 return ret;
4553 PodCopy(ret.get(), s, n);
4554 return ret;
4555 }
4556
4557 UniquePtr<char16_t[], JS::FreePolicy>
DuplicateString(js::ExclusiveContext * cx,const char16_t * s)4558 js::DuplicateString(js::ExclusiveContext* cx, const char16_t* s)
4559 {
4560 size_t n = js_strlen(s) + 1;
4561 auto ret = cx->make_pod_array<char16_t>(n);
4562 if (!ret)
4563 return ret;
4564 PodCopy(ret.get(), s, n);
4565 return ret;
4566 }
4567
4568 UniquePtr<char16_t[], JS::FreePolicy>
DuplicateString(const char16_t * s)4569 js::DuplicateString(const char16_t* s)
4570 {
4571 size_t n = js_strlen(s) + 1;
4572 UniquePtr<char16_t[], JS::FreePolicy> ret(js_pod_malloc<char16_t>(n));
4573 if (!ret)
4574 return nullptr;
4575 PodCopy(ret.get(), s, n);
4576 return ret;
4577 }
4578
4579 template <typename CharT>
4580 const CharT*
js_strchr_limit(const CharT * s,char16_t c,const CharT * limit)4581 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit)
4582 {
4583 while (s < limit) {
4584 if (*s == c)
4585 return s;
4586 s++;
4587 }
4588 return nullptr;
4589 }
4590
4591 template const Latin1Char*
4592 js_strchr_limit(const Latin1Char* s, char16_t c, const Latin1Char* limit);
4593
4594 template const char16_t*
4595 js_strchr_limit(const char16_t* s, char16_t c, const char16_t* limit);
4596
4597 char16_t*
InflateString(ExclusiveContext * cx,const char * bytes,size_t * lengthp)4598 js::InflateString(ExclusiveContext* cx, const char* bytes, size_t* lengthp)
4599 {
4600 size_t nchars;
4601 char16_t* chars;
4602 size_t nbytes = *lengthp;
4603
4604 nchars = nbytes;
4605 chars = cx->pod_malloc<char16_t>(nchars + 1);
4606 if (!chars)
4607 goto bad;
4608 for (size_t i = 0; i < nchars; i++)
4609 chars[i] = (unsigned char) bytes[i];
4610 *lengthp = nchars;
4611 chars[nchars] = 0;
4612 return chars;
4613
4614 bad:
4615 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4616 // on errors.
4617 *lengthp = 0;
4618 return nullptr;
4619 }
4620
4621 template <typename CharT>
4622 bool
DeflateStringToBuffer(JSContext * maybecx,const CharT * src,size_t srclen,char * dst,size_t * dstlenp)4623 js::DeflateStringToBuffer(JSContext* maybecx, const CharT* src, size_t srclen,
4624 char* dst, size_t* dstlenp)
4625 {
4626 size_t dstlen = *dstlenp;
4627 if (srclen > dstlen) {
4628 for (size_t i = 0; i < dstlen; i++)
4629 dst[i] = char(src[i]);
4630 if (maybecx) {
4631 AutoSuppressGC suppress(maybecx);
4632 JS_ReportErrorNumber(maybecx, GetErrorMessage, nullptr,
4633 JSMSG_BUFFER_TOO_SMALL);
4634 }
4635 return false;
4636 }
4637 for (size_t i = 0; i < srclen; i++)
4638 dst[i] = char(src[i]);
4639 *dstlenp = srclen;
4640 return true;
4641 }
4642
4643 template bool
4644 js::DeflateStringToBuffer(JSContext* maybecx, const Latin1Char* src, size_t srclen,
4645 char* dst, size_t* dstlenp);
4646
4647 template bool
4648 js::DeflateStringToBuffer(JSContext* maybecx, const char16_t* src, size_t srclen,
4649 char* dst, size_t* dstlenp);
4650
4651 #define ____ false
4652
4653 /*
4654 * Identifier start chars:
4655 * - 36: $
4656 * - 65..90: A..Z
4657 * - 95: _
4658 * - 97..122: a..z
4659 */
4660 const bool js_isidstart[] = {
4661 /* 0 1 2 3 4 5 6 7 8 9 */
4662 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4663 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4664 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4665 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4666 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4667 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4668 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4669 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4670 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4671 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4672 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4673 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4674 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4675 };
4676
4677 /*
4678 * Identifier chars:
4679 * - 36: $
4680 * - 48..57: 0..9
4681 * - 65..90: A..Z
4682 * - 95: _
4683 * - 97..122: a..z
4684 */
4685 const bool js_isident[] = {
4686 /* 0 1 2 3 4 5 6 7 8 9 */
4687 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4688 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4689 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4690 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4691 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4692 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4693 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4694 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4695 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4696 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4697 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4698 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4699 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4700 };
4701
4702 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4703 const bool js_isspace[] = {
4704 /* 0 1 2 3 4 5 6 7 8 9 */
4705 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
4706 /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
4707 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4708 /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
4709 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4710 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4711 /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4712 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4713 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4714 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4715 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4716 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4717 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4718 };
4719
4720 /*
4721 * Uri reserved chars + #:
4722 * - 35: #
4723 * - 36: $
4724 * - 38: &
4725 * - 43: +
4726 * - 44: ,
4727 * - 47: /
4728 * - 58: :
4729 * - 59: ;
4730 * - 61: =
4731 * - 63: ?
4732 * - 64: @
4733 */
4734 static const bool js_isUriReservedPlusPound[] = {
4735 /* 0 1 2 3 4 5 6 7 8 9 */
4736 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4737 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4738 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4739 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
4740 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
4741 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4742 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
4743 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4744 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4745 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4746 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4747 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4748 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4749 };
4750
4751 /*
4752 * Uri unescaped chars:
4753 * - 33: !
4754 * - 39: '
4755 * - 40: (
4756 * - 41: )
4757 * - 42: *
4758 * - 45: -
4759 * - 46: .
4760 * - 48..57: 0-9
4761 * - 65..90: A-Z
4762 * - 95: _
4763 * - 97..122: a-z
4764 * - 126: ~
4765 */
4766 static const bool js_isUriUnescaped[] = {
4767 /* 0 1 2 3 4 5 6 7 8 9 */
4768 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4769 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4770 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4771 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
4772 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
4773 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4774 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4775 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4776 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4777 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4778 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4779 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4780 /* 12 */ true, true, true, ____, ____, ____, true, ____
4781 };
4782
4783 #undef ____
4784
4785 #define URI_CHUNK 64U
4786
4787 static inline bool
TransferBufferToString(StringBuffer & sb,MutableHandleValue rval)4788 TransferBufferToString(StringBuffer& sb, MutableHandleValue rval)
4789 {
4790 JSString* str = sb.finishString();
4791 if (!str)
4792 return false;
4793 rval.setString(str);
4794 return true;
4795 }
4796
4797 /*
4798 * ECMA 3, 15.1.3 URI Handling Function Properties
4799 *
4800 * The following are implementations of the algorithms
4801 * given in the ECMA specification for the hidden functions
4802 * 'Encode' and 'Decode'.
4803 */
4804 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4805
4806 template <typename CharT>
4807 static EncodeResult
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet,const bool * unescapedSet2)4808 Encode(StringBuffer& sb, const CharT* chars, size_t length,
4809 const bool* unescapedSet, const bool* unescapedSet2)
4810 {
4811 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4812
4813 char16_t hexBuf[4];
4814 hexBuf[0] = '%';
4815 hexBuf[3] = 0;
4816
4817 for (size_t k = 0; k < length; k++) {
4818 char16_t c = chars[k];
4819 if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
4820 if (!sb.append(c))
4821 return Encode_Failure;
4822 } else {
4823 if (c >= 0xDC00 && c <= 0xDFFF)
4824 return Encode_BadUri;
4825
4826 uint32_t v;
4827 if (c < 0xD800 || c > 0xDBFF) {
4828 v = c;
4829 } else {
4830 k++;
4831 if (k == length)
4832 return Encode_BadUri;
4833
4834 char16_t c2 = chars[k];
4835 if (c2 < 0xDC00 || c2 > 0xDFFF)
4836 return Encode_BadUri;
4837
4838 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4839 }
4840 uint8_t utf8buf[4];
4841 size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4842 for (size_t j = 0; j < L; j++) {
4843 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4844 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4845 if (!sb.append(hexBuf, 3))
4846 return Encode_Failure;
4847 }
4848 }
4849 }
4850
4851 return Encode_Success;
4852 }
4853
4854 static bool
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,const bool * unescapedSet2,MutableHandleValue rval)4855 Encode(JSContext* cx, HandleLinearString str, const bool* unescapedSet,
4856 const bool* unescapedSet2, MutableHandleValue rval)
4857 {
4858 size_t length = str->length();
4859 if (length == 0) {
4860 rval.setString(cx->runtime()->emptyString);
4861 return true;
4862 }
4863
4864 StringBuffer sb(cx);
4865 if (!sb.reserve(length))
4866 return false;
4867
4868 EncodeResult res;
4869 if (str->hasLatin1Chars()) {
4870 AutoCheckCannotGC nogc;
4871 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet, unescapedSet2);
4872 } else {
4873 AutoCheckCannotGC nogc;
4874 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet, unescapedSet2);
4875 }
4876
4877 if (res == Encode_Failure)
4878 return false;
4879
4880 if (res == Encode_BadUri) {
4881 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI, nullptr);
4882 return false;
4883 }
4884
4885 MOZ_ASSERT(res == Encode_Success);
4886 return TransferBufferToString(sb, rval);
4887 }
4888
4889 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4890
4891 template <typename CharT>
4892 static DecodeResult
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4893 Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reservedSet)
4894 {
4895 for (size_t k = 0; k < length; k++) {
4896 char16_t c = chars[k];
4897 if (c == '%') {
4898 size_t start = k;
4899 if ((k + 2) >= length)
4900 return Decode_BadUri;
4901
4902 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4903 return Decode_BadUri;
4904
4905 uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4906 k += 2;
4907 if (!(B & 0x80)) {
4908 c = char16_t(B);
4909 } else {
4910 int n = 1;
4911 while (B & (0x80 >> n))
4912 n++;
4913
4914 if (n == 1 || n > 4)
4915 return Decode_BadUri;
4916
4917 uint8_t octets[4];
4918 octets[0] = (uint8_t)B;
4919 if (k + 3 * (n - 1) >= length)
4920 return Decode_BadUri;
4921
4922 for (int j = 1; j < n; j++) {
4923 k++;
4924 if (chars[k] != '%')
4925 return Decode_BadUri;
4926
4927 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4928 return Decode_BadUri;
4929
4930 B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4931 if ((B & 0xC0) != 0x80)
4932 return Decode_BadUri;
4933
4934 k += 2;
4935 octets[j] = char(B);
4936 }
4937 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4938 if (v >= 0x10000) {
4939 v -= 0x10000;
4940 if (v > 0xFFFFF)
4941 return Decode_BadUri;
4942
4943 c = char16_t((v & 0x3FF) + 0xDC00);
4944 char16_t H = char16_t((v >> 10) + 0xD800);
4945 if (!sb.append(H))
4946 return Decode_Failure;
4947 } else {
4948 c = char16_t(v);
4949 }
4950 }
4951 if (c < 128 && reservedSet && reservedSet[c]) {
4952 if (!sb.append(chars + start, k - start + 1))
4953 return Decode_Failure;
4954 } else {
4955 if (!sb.append(c))
4956 return Decode_Failure;
4957 }
4958 } else {
4959 if (!sb.append(c))
4960 return Decode_Failure;
4961 }
4962 }
4963
4964 return Decode_Success;
4965 }
4966
4967 static bool
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4968 Decode(JSContext* cx, HandleLinearString str, const bool* reservedSet, MutableHandleValue rval)
4969 {
4970 size_t length = str->length();
4971 if (length == 0) {
4972 rval.setString(cx->runtime()->emptyString);
4973 return true;
4974 }
4975
4976 StringBuffer sb(cx);
4977
4978 DecodeResult res;
4979 if (str->hasLatin1Chars()) {
4980 AutoCheckCannotGC nogc;
4981 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4982 } else {
4983 AutoCheckCannotGC nogc;
4984 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4985 }
4986
4987 if (res == Decode_Failure)
4988 return false;
4989
4990 if (res == Decode_BadUri) {
4991 JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4992 return false;
4993 }
4994
4995 MOZ_ASSERT(res == Decode_Success);
4996 return TransferBufferToString(sb, rval);
4997 }
4998
4999 static bool
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)5000 str_decodeURI(JSContext* cx, unsigned argc, Value* vp)
5001 {
5002 CallArgs args = CallArgsFromVp(argc, vp);
5003 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5004 if (!str)
5005 return false;
5006
5007 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
5008 }
5009
5010 static bool
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)5011 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5012 {
5013 CallArgs args = CallArgsFromVp(argc, vp);
5014 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5015 if (!str)
5016 return false;
5017
5018 return Decode(cx, str, nullptr, args.rval());
5019 }
5020
5021 static bool
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)5022 str_encodeURI(JSContext* cx, unsigned argc, Value* vp)
5023 {
5024 CallArgs args = CallArgsFromVp(argc, vp);
5025 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5026 if (!str)
5027 return false;
5028
5029 return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval());
5030 }
5031
5032 static bool
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)5033 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5034 {
5035 CallArgs args = CallArgsFromVp(argc, vp);
5036 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5037 if (!str)
5038 return false;
5039
5040 return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval());
5041 }
5042
5043 /*
5044 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
5045 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
5046 */
5047 uint32_t
OneUcs4ToUtf8Char(uint8_t * utf8Buffer,uint32_t ucs4Char)5048 js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
5049 {
5050 MOZ_ASSERT(ucs4Char <= 0x10FFFF);
5051
5052 if (ucs4Char < 0x80) {
5053 utf8Buffer[0] = uint8_t(ucs4Char);
5054 return 1;
5055 }
5056
5057 uint32_t a = ucs4Char >> 11;
5058 uint32_t utf8Length = 2;
5059 while (a) {
5060 a >>= 5;
5061 utf8Length++;
5062 }
5063
5064 MOZ_ASSERT(utf8Length <= 4);
5065
5066 uint32_t i = utf8Length;
5067 while (--i) {
5068 utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80);
5069 ucs4Char >>= 6;
5070 }
5071
5072 utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char);
5073 return utf8Length;
5074 }
5075
5076 size_t
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,JSLinearString * str,uint32_t quote)5077 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, JSLinearString* str,
5078 uint32_t quote)
5079 {
5080 size_t len = str->length();
5081 AutoCheckCannotGC nogc;
5082 return str->hasLatin1Chars()
5083 ? PutEscapedStringImpl(buffer, bufferSize, out, str->latin1Chars(nogc), len, quote)
5084 : PutEscapedStringImpl(buffer, bufferSize, out, str->twoByteChars(nogc), len, quote);
5085 }
5086
5087 template <typename CharT>
5088 size_t
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,const CharT * chars,size_t length,uint32_t quote)5089 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars,
5090 size_t length, uint32_t quote)
5091 {
5092 enum {
5093 STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
5094 } state;
5095
5096 MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
5097 MOZ_ASSERT_IF(!buffer, bufferSize == 0);
5098 MOZ_ASSERT_IF(out, !buffer);
5099
5100 if (bufferSize == 0)
5101 buffer = nullptr;
5102 else
5103 bufferSize--;
5104
5105 const CharT* charsEnd = chars + length;
5106 size_t n = 0;
5107 state = FIRST_QUOTE;
5108 unsigned shift = 0;
5109 unsigned hex = 0;
5110 unsigned u = 0;
5111 char c = 0; /* to quell GCC warnings */
5112
5113 for (;;) {
5114 switch (state) {
5115 case STOP:
5116 goto stop;
5117 case FIRST_QUOTE:
5118 state = CHARS;
5119 goto do_quote;
5120 case LAST_QUOTE:
5121 state = STOP;
5122 do_quote:
5123 if (quote == 0)
5124 continue;
5125 c = (char)quote;
5126 break;
5127 case CHARS:
5128 if (chars == charsEnd) {
5129 state = LAST_QUOTE;
5130 continue;
5131 }
5132 u = *chars++;
5133 if (u < ' ') {
5134 if (u != 0) {
5135 const char* escape = strchr(js_EscapeMap, (int)u);
5136 if (escape) {
5137 u = escape[1];
5138 goto do_escape;
5139 }
5140 }
5141 goto do_hex_escape;
5142 }
5143 if (u < 127) {
5144 if (u == quote || u == '\\')
5145 goto do_escape;
5146 c = (char)u;
5147 } else if (u < 0x100) {
5148 goto do_hex_escape;
5149 } else {
5150 shift = 16;
5151 hex = u;
5152 u = 'u';
5153 goto do_escape;
5154 }
5155 break;
5156 do_hex_escape:
5157 shift = 8;
5158 hex = u;
5159 u = 'x';
5160 do_escape:
5161 c = '\\';
5162 state = ESCAPE_START;
5163 break;
5164 case ESCAPE_START:
5165 MOZ_ASSERT(' ' <= u && u < 127);
5166 c = (char)u;
5167 state = ESCAPE_MORE;
5168 break;
5169 case ESCAPE_MORE:
5170 if (shift == 0) {
5171 state = CHARS;
5172 continue;
5173 }
5174 shift -= 4;
5175 u = 0xF & (hex >> shift);
5176 c = (char)(u + (u < 10 ? '0' : 'A' - 10));
5177 break;
5178 }
5179 if (buffer) {
5180 MOZ_ASSERT(n <= bufferSize);
5181 if (n != bufferSize) {
5182 buffer[n] = c;
5183 } else {
5184 buffer[n] = '\0';
5185 buffer = nullptr;
5186 }
5187 } else if (out) {
5188 if (out->put(&c, 1) < 0)
5189 return size_t(-1);
5190 }
5191 n++;
5192 }
5193 stop:
5194 if (buffer)
5195 buffer[n] = '\0';
5196 return n;
5197 }
5198
5199 template size_t
5200 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const Latin1Char* chars,
5201 size_t length, uint32_t quote);
5202
5203 template size_t
5204 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char* chars,
5205 size_t length, uint32_t quote);
5206
5207 template size_t
5208 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const char16_t* chars,
5209 size_t length, uint32_t quote);
5210
5211 template size_t
5212 js::PutEscapedString(char* buffer, size_t bufferSize, const Latin1Char* chars, size_t length,
5213 uint32_t quote);
5214
5215 template size_t
5216 js::PutEscapedString(char* buffer, size_t bufferSize, const char16_t* chars, size_t length,
5217 uint32_t quote);
5218