1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 2 * vim: set ts=8 sts=4 et sw=4 tw=99: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef jsstr_h 8 #define jsstr_h 9 10 #include "mozilla/HashFunctions.h" 11 #include "mozilla/PodOperations.h" 12 #include "mozilla/TextUtils.h" 13 14 #include <stdio.h> 15 16 #include "jsutil.h" 17 #include "NamespaceImports.h" 18 19 #include "gc/Rooting.h" 20 #include "js/RootingAPI.h" 21 #include "js/UniquePtr.h" 22 #include "vm/Printer.h" 23 #include "vm/Unicode.h" 24 25 class JSAutoByteString; 26 class JSLinearString; 27 28 namespace js { 29 30 class StringBuffer; 31 32 template <AllowGC allowGC> 33 extern JSString* 34 ConcatStrings(ExclusiveContext* cx, 35 typename MaybeRooted<JSString*, allowGC>::HandleType left, 36 typename MaybeRooted<JSString*, allowGC>::HandleType right); 37 38 // Return s advanced past any Unicode white space characters. 39 template <typename CharT> 40 static inline const CharT* 41 SkipSpace(const CharT* s, const CharT* end) 42 { 43 MOZ_ASSERT(s <= end); 44 45 while (s < end && unicode::IsSpace(*s)) 46 s++; 47 48 return s; 49 } 50 51 // Return less than, equal to, or greater than zero depending on whether 52 // s1 is less than, equal to, or greater than s2. 53 template <typename Char1, typename Char2> 54 inline int32_t 55 CompareChars(const Char1* s1, size_t len1, const Char2* s2, size_t len2) 56 { 57 size_t n = Min(len1, len2); 58 for (size_t i = 0; i < n; i++) { 59 if (int32_t cmp = s1[i] - s2[i]) 60 return cmp; 61 } 62 63 return int32_t(len1 - len2); 64 } 65 66 extern int32_t 67 CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2); 68 69 } /* namespace js */ 70 71 struct JSSubString { 72 JSLinearString* base; 73 size_t offset; 74 size_t length; 75 76 JSSubString() { mozilla::PodZero(this); } 77 78 void initEmpty(JSLinearString* base) { 79 this->base = base; 80 offset = length = 0; 81 } 82 void init(JSLinearString* base, size_t offset, size_t length) { 83 this->base = base; 84 this->offset = offset; 85 this->length = length; 86 } 87 }; 88 89 /* 90 * Shorthands for ASCII (7-bit) decimal and hex conversion. 91 * Manually inline isdigit for performance; MSVC doesn't do this for us. 92 */ 93 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9) 94 #define JS7_UNDEC(c) ((c) - '0') 95 #define JS7_ISOCT(c) ((((unsigned)(c)) - '0') <= 7) 96 #define JS7_UNOCT(c) (JS7_UNDEC(c)) 97 #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c)) 98 #define JS7_UNHEX(c) (unsigned)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a') 99 #define JS7_ISLET(c) (mozilla::IsAsciiAlpha(c)) 100 101 extern size_t 102 js_strlen(const char16_t* s); 103 104 extern int32_t 105 js_strcmp(const char16_t* lhs, const char16_t* rhs); 106 107 template <typename CharT> 108 extern const CharT* 109 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit); 110 111 static MOZ_ALWAYS_INLINE void 112 js_strncpy(char16_t* dst, const char16_t* src, size_t nelem) 113 { 114 return mozilla::PodCopy(dst, src, nelem); 115 } 116 117 extern int32_t 118 js_fputs(const char16_t* s, FILE* f); 119 120 namespace js { 121 122 /* Initialize the String class, returning its prototype object. */ 123 extern JSObject* 124 InitStringClass(JSContext* cx, HandleObject obj); 125 126 /* 127 * Convert a value to a printable C string. 128 */ 129 extern const char* 130 ValueToPrintable(JSContext* cx, const Value&, JSAutoByteString* bytes, bool asSource = false); 131 132 extern UniqueChars 133 DuplicateString(ExclusiveContext* cx, const char* s); 134 135 extern UniqueTwoByteChars 136 DuplicateString(ExclusiveContext* cx, const char16_t* s); 137 138 /* 139 * These variants do not report OOMs, you must arrange for OOMs to be reported 140 * yourself. 141 */ 142 extern UniqueChars 143 DuplicateString(const char* s); 144 145 extern UniqueChars 146 DuplicateString(const char* s, size_t n); 147 148 extern UniqueTwoByteChars 149 DuplicateString(const char16_t* s); 150 151 extern UniqueTwoByteChars 152 DuplicateString(const char16_t* s, size_t n); 153 154 /* 155 * Convert a non-string value to a string, returning null after reporting an 156 * error, otherwise returning a new string reference. 157 */ 158 template <AllowGC allowGC> 159 extern JSString* 160 ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg); 161 162 /* 163 * Convert the given value to a string. This method includes an inline 164 * fast-path for the case where the value is already a string; if the value is 165 * known not to be a string, use ToStringSlow instead. 166 */ 167 template <AllowGC allowGC> 168 static MOZ_ALWAYS_INLINE JSString* 169 ToString(JSContext* cx, JS::HandleValue v) 170 { 171 if (v.isString()) 172 return v.toString(); 173 return ToStringSlow<allowGC>(cx, v); 174 } 175 176 /* 177 * This function implements E-262-3 section 9.8, toString. Convert the given 178 * value to a string of characters appended to the given buffer. On error, the 179 * passed buffer may have partial results appended. 180 */ 181 inline bool 182 ValueToStringBuffer(JSContext* cx, const Value& v, StringBuffer& sb); 183 184 /* 185 * Convert a value to its source expression, returning null after reporting 186 * an error, otherwise returning a new string reference. 187 */ 188 extern JSString* 189 ValueToSource(JSContext* cx, HandleValue v); 190 191 /* 192 * Convert a JSString to its source expression; returns null after reporting an 193 * error, otherwise returns a new string reference. No Handle needed since the 194 * input is dead after the GC. 195 */ 196 extern JSString* 197 StringToSource(JSContext* cx, JSString* str); 198 199 /* 200 * Test if strings are equal. The caller can call the function even if str1 201 * or str2 are not GC-allocated things. 202 */ 203 extern bool 204 EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result); 205 206 /* Use the infallible method instead! */ 207 extern bool 208 EqualStrings(JSContext* cx, JSLinearString* str1, JSLinearString* str2, bool* result) = delete; 209 210 /* EqualStrings is infallible on linear strings. */ 211 extern bool 212 EqualStrings(JSLinearString* str1, JSLinearString* str2); 213 214 extern bool 215 EqualChars(JSLinearString* str1, JSLinearString* str2); 216 217 /* 218 * Return less than, equal to, or greater than zero depending on whether 219 * str1 is less than, equal to, or greater than str2. 220 */ 221 extern bool 222 CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result); 223 224 /* 225 * Same as CompareStrings but for atoms. Don't use this to just test 226 * for equality; use this when you need an ordering on atoms. 227 */ 228 extern int32_t 229 CompareAtoms(JSAtom* atom1, JSAtom* atom2); 230 231 /* 232 * Return true if the string matches the given sequence of ASCII bytes. 233 */ 234 extern bool 235 StringEqualsAscii(JSLinearString* str, const char* asciiBytes); 236 237 /* Return true if the string contains a pattern anywhere inside it. */ 238 extern bool 239 StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen); 240 241 extern int 242 StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start); 243 244 /* Return true if the string contains a pattern at |start|. */ 245 extern bool 246 HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start); 247 248 template <typename Char1, typename Char2> 249 inline bool 250 EqualChars(const Char1* s1, const Char2* s2, size_t len); 251 252 template <typename Char1> 253 inline bool 254 EqualChars(const Char1* s1, const Char1* s2, size_t len) 255 { 256 return mozilla::PodEqual(s1, s2, len); 257 } 258 259 template <typename Char1, typename Char2> 260 inline bool 261 EqualChars(const Char1* s1, const Char2* s2, size_t len) 262 { 263 for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) { 264 if (*s1 != *s2) 265 return false; 266 } 267 return true; 268 } 269 270 /* 271 * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt). 272 * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden 273 * and constitute API misuse. 274 */ 275 JSString* 276 SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt); 277 278 /* 279 * Inflate bytes in ASCII encoding to char16_t code units. Return null on error, 280 * otherwise return the char16_t buffer that was malloc'ed. length is updated to 281 * the length of the new string (in char16_t code units). A null char is 282 * appended, but it is not included in the length. 283 */ 284 extern char16_t* 285 InflateString(ExclusiveContext* cx, const char* bytes, size_t* length); 286 287 /* 288 * Inflate bytes to JS chars in an existing buffer. 'dst' must be large 289 * enough for 'srclen' char16_t code units. The buffer is NOT null-terminated. 290 */ 291 inline void 292 CopyAndInflateChars(char16_t* dst, const char* src, size_t srclen) 293 { 294 for (size_t i = 0; i < srclen; i++) 295 dst[i] = (unsigned char) src[i]; 296 } 297 298 inline void 299 CopyAndInflateChars(char16_t* dst, const JS::Latin1Char* src, size_t srclen) 300 { 301 for (size_t i = 0; i < srclen; i++) 302 dst[i] = src[i]; 303 } 304 305 /* 306 * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for 307 * 'length chars. The buffer is NOT null-terminated. The destination length 308 * must to be initialized with the buffer size and will contain on return the 309 * number of copied bytes. 310 */ 311 template <typename CharT> 312 extern bool 313 DeflateStringToBuffer(JSContext* maybecx, const CharT* chars, 314 size_t charsLength, char* bytes, size_t* length); 315 316 extern bool 317 str_fromCharCode(JSContext* cx, unsigned argc, Value* vp); 318 319 extern bool 320 str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval); 321 322 extern bool 323 str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp); 324 325 extern bool 326 str_fromCodePoint_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval); 327 328 /* String methods exposed so they can be installed in the self-hosting global. */ 329 330 extern bool 331 str_includes(JSContext* cx, unsigned argc, Value* vp); 332 333 extern bool 334 str_indexOf(JSContext* cx, unsigned argc, Value* vp); 335 336 extern bool 337 str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp); 338 339 extern bool 340 str_startsWith(JSContext* cx, unsigned argc, Value* vp); 341 342 extern bool 343 str_toLowerCase(JSContext* cx, unsigned argc, Value* vp); 344 345 extern bool 346 str_toUpperCase(JSContext* cx, unsigned argc, Value* vp); 347 348 extern bool 349 str_toString(JSContext* cx, unsigned argc, Value* vp); 350 351 extern bool 352 str_charAt(JSContext* cx, unsigned argc, Value* vp); 353 354 extern bool 355 str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res); 356 357 extern bool 358 str_charCodeAt(JSContext* cx, unsigned argc, Value* vp); 359 360 extern bool 361 str_contains(JSContext *cx, unsigned argc, Value *vp); 362 363 extern bool 364 str_endsWith(JSContext* cx, unsigned argc, Value* vp); 365 366 extern bool 367 str_trim(JSContext* cx, unsigned argc, Value* vp); 368 369 extern bool 370 str_trimLeft(JSContext* cx, unsigned argc, Value* vp); 371 372 extern bool 373 str_trimRight(JSContext* cx, unsigned argc, Value* vp); 374 375 extern bool 376 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp); 377 378 extern bool 379 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp); 380 381 #if !EXPOSE_INTL_API 382 extern bool 383 str_localeCompare(JSContext* cx, unsigned argc, Value* vp); 384 #else 385 extern bool 386 str_normalize(JSContext* cx, unsigned argc, Value* vp); 387 #endif 388 389 extern bool 390 str_concat(JSContext* cx, unsigned argc, Value* vp); 391 392 /* 393 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at 394 * least 4 bytes long. Return the number of UTF-8 bytes of data written. 395 */ 396 extern uint32_t 397 OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char); 398 399 extern size_t 400 PutEscapedStringImpl(char* buffer, size_t size, GenericPrinter* out, JSLinearString* str, 401 uint32_t quote); 402 403 template <typename CharT> 404 extern size_t 405 PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars, 406 size_t length, uint32_t quote); 407 408 /* 409 * Write str into buffer escaping any non-printable or non-ASCII character 410 * using \escapes for JS string literals. 411 * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns 412 * the length of the written output, NOT including the NUL. Thus, a return 413 * value of size or more means that the output was truncated. If buffer 414 * is null, just returns the length of the output. If quote is not 0, it must 415 * be a single or double quote character that will quote the output. 416 */ 417 inline size_t 418 PutEscapedString(char* buffer, size_t size, JSLinearString* str, uint32_t quote) 419 { 420 size_t n = PutEscapedStringImpl(buffer, size, nullptr, str, quote); 421 422 /* PutEscapedStringImpl can only fail with a file. */ 423 MOZ_ASSERT(n != size_t(-1)); 424 return n; 425 } 426 427 template <typename CharT> 428 inline size_t 429 PutEscapedString(char* buffer, size_t bufferSize, const CharT* chars, size_t length, uint32_t quote) 430 { 431 size_t n = PutEscapedStringImpl(buffer, bufferSize, nullptr, chars, length, quote); 432 433 /* PutEscapedStringImpl can only fail with a file. */ 434 MOZ_ASSERT(n != size_t(-1)); 435 return n; 436 } 437 438 inline bool 439 EscapedStringPrinter(GenericPrinter& out, JSLinearString* str, uint32_t quote) 440 { 441 return PutEscapedStringImpl(nullptr, 0, &out, str, quote) != size_t(-1); 442 } 443 444 inline bool 445 EscapedStringPrinter(GenericPrinter& out, const char* chars, size_t length, uint32_t quote) 446 { 447 return PutEscapedStringImpl(nullptr, 0, &out, chars, length, quote) != size_t(-1); 448 } 449 450 /* 451 * Write str into file escaping any non-printable or non-ASCII character. 452 * If quote is not 0, it must be a single or double quote character that 453 * will quote the output. 454 */ 455 inline bool 456 FileEscapedString(FILE* fp, JSLinearString* str, uint32_t quote) 457 { 458 Fprinter out(fp); 459 bool res = EscapedStringPrinter(out, str, quote); 460 out.finish(); 461 return res; 462 } 463 464 inline bool 465 FileEscapedString(FILE* fp, const char* chars, size_t length, uint32_t quote) 466 { 467 Fprinter out(fp); 468 bool res = EscapedStringPrinter(out, chars, length, quote); 469 out.finish(); 470 return res; 471 } 472 473 JSObject* 474 str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep, 475 uint32_t limit); 476 477 JSString * 478 str_flat_replace_string(JSContext *cx, HandleString string, HandleString pattern, 479 HandleString replacement); 480 481 JSString* 482 str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern, 483 HandleString replacement); 484 485 extern bool 486 StringConstructor(JSContext* cx, unsigned argc, Value* vp); 487 488 extern bool 489 FlatStringMatch(JSContext* cx, unsigned argc, Value* vp); 490 491 extern bool 492 FlatStringSearch(JSContext* cx, unsigned argc, Value* vp); 493 494 } /* namespace js */ 495 496 #endif /* jsstr_h */ 497