1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jsstr_h
8 #define jsstr_h
9 
10 #include "mozilla/HashFunctions.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/TextUtils.h"
13 
14 #include <stdio.h>
15 
16 #include "jsutil.h"
17 #include "NamespaceImports.h"
18 
19 #include "gc/Rooting.h"
20 #include "js/RootingAPI.h"
21 #include "js/UniquePtr.h"
22 #include "vm/Printer.h"
23 #include "vm/Unicode.h"
24 
25 class JSAutoByteString;
26 class JSLinearString;
27 
28 namespace js {
29 
30 class StringBuffer;
31 
32 template <AllowGC allowGC>
33 extern JSString*
34 ConcatStrings(ExclusiveContext* cx,
35               typename MaybeRooted<JSString*, allowGC>::HandleType left,
36               typename MaybeRooted<JSString*, allowGC>::HandleType right);
37 
38 // Return s advanced past any Unicode white space characters.
39 template <typename CharT>
40 static inline const CharT*
41 SkipSpace(const CharT* s, const CharT* end)
42 {
43     MOZ_ASSERT(s <= end);
44 
45     while (s < end && unicode::IsSpace(*s))
46         s++;
47 
48     return s;
49 }
50 
51 // Return less than, equal to, or greater than zero depending on whether
52 // s1 is less than, equal to, or greater than s2.
53 template <typename Char1, typename Char2>
54 inline int32_t
55 CompareChars(const Char1* s1, size_t len1, const Char2* s2, size_t len2)
56 {
57     size_t n = Min(len1, len2);
58     for (size_t i = 0; i < n; i++) {
59         if (int32_t cmp = s1[i] - s2[i])
60             return cmp;
61     }
62 
63     return int32_t(len1 - len2);
64 }
65 
66 extern int32_t
67 CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2);
68 
69 }  /* namespace js */
70 
71 struct JSSubString {
72     JSLinearString* base;
73     size_t          offset;
74     size_t          length;
75 
76     JSSubString() { mozilla::PodZero(this); }
77 
78     void initEmpty(JSLinearString* base) {
79         this->base = base;
80         offset = length = 0;
81     }
82     void init(JSLinearString* base, size_t offset, size_t length) {
83         this->base = base;
84         this->offset = offset;
85         this->length = length;
86     }
87 };
88 
89 /*
90  * Shorthands for ASCII (7-bit) decimal and hex conversion.
91  * Manually inline isdigit for performance; MSVC doesn't do this for us.
92  */
93 #define JS7_ISDEC(c)    ((((unsigned)(c)) - '0') <= 9)
94 #define JS7_UNDEC(c)    ((c) - '0')
95 #define JS7_ISOCT(c)    ((((unsigned)(c)) - '0') <= 7)
96 #define JS7_UNOCT(c)    (JS7_UNDEC(c))
97 #define JS7_ISHEX(c)    ((c) < 128 && isxdigit(c))
98 #define JS7_UNHEX(c)    (unsigned)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
99 #define JS7_ISLET(c)    (mozilla::IsAsciiAlpha(c))
100 
101 extern size_t
102 js_strlen(const char16_t* s);
103 
104 extern int32_t
105 js_strcmp(const char16_t* lhs, const char16_t* rhs);
106 
107 template <typename CharT>
108 extern const CharT*
109 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit);
110 
111 static MOZ_ALWAYS_INLINE void
112 js_strncpy(char16_t* dst, const char16_t* src, size_t nelem)
113 {
114     return mozilla::PodCopy(dst, src, nelem);
115 }
116 
117 extern int32_t
118 js_fputs(const char16_t* s, FILE* f);
119 
120 namespace js {
121 
122 /* Initialize the String class, returning its prototype object. */
123 extern JSObject*
124 InitStringClass(JSContext* cx, HandleObject obj);
125 
126 /*
127  * Convert a value to a printable C string.
128  */
129 extern const char*
130 ValueToPrintable(JSContext* cx, const Value&, JSAutoByteString* bytes, bool asSource = false);
131 
132 extern UniqueChars
133 DuplicateString(ExclusiveContext* cx, const char* s);
134 
135 extern UniqueTwoByteChars
136 DuplicateString(ExclusiveContext* cx, const char16_t* s);
137 
138 /*
139  * These variants do not report OOMs, you must arrange for OOMs to be reported
140  * yourself.
141  */
142 extern UniqueChars
143 DuplicateString(const char* s);
144 
145 extern UniqueChars
146 DuplicateString(const char* s, size_t n);
147 
148 extern UniqueTwoByteChars
149 DuplicateString(const char16_t* s);
150 
151 extern UniqueTwoByteChars
152 DuplicateString(const char16_t* s, size_t n);
153 
154 /*
155  * Convert a non-string value to a string, returning null after reporting an
156  * error, otherwise returning a new string reference.
157  */
158 template <AllowGC allowGC>
159 extern JSString*
160 ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg);
161 
162 /*
163  * Convert the given value to a string.  This method includes an inline
164  * fast-path for the case where the value is already a string; if the value is
165  * known not to be a string, use ToStringSlow instead.
166  */
167 template <AllowGC allowGC>
168 static MOZ_ALWAYS_INLINE JSString*
169 ToString(JSContext* cx, JS::HandleValue v)
170 {
171     if (v.isString())
172         return v.toString();
173     return ToStringSlow<allowGC>(cx, v);
174 }
175 
176 /*
177  * This function implements E-262-3 section 9.8, toString. Convert the given
178  * value to a string of characters appended to the given buffer. On error, the
179  * passed buffer may have partial results appended.
180  */
181 inline bool
182 ValueToStringBuffer(JSContext* cx, const Value& v, StringBuffer& sb);
183 
184 /*
185  * Convert a value to its source expression, returning null after reporting
186  * an error, otherwise returning a new string reference.
187  */
188 extern JSString*
189 ValueToSource(JSContext* cx, HandleValue v);
190 
191 /*
192  * Convert a JSString to its source expression; returns null after reporting an
193  * error, otherwise returns a new string reference. No Handle needed since the
194  * input is dead after the GC.
195  */
196 extern JSString*
197 StringToSource(JSContext* cx, JSString* str);
198 
199 /*
200  * Test if strings are equal. The caller can call the function even if str1
201  * or str2 are not GC-allocated things.
202  */
203 extern bool
204 EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result);
205 
206 /* Use the infallible method instead! */
207 extern bool
208 EqualStrings(JSContext* cx, JSLinearString* str1, JSLinearString* str2, bool* result) = delete;
209 
210 /* EqualStrings is infallible on linear strings. */
211 extern bool
212 EqualStrings(JSLinearString* str1, JSLinearString* str2);
213 
214 extern bool
215 EqualChars(JSLinearString* str1, JSLinearString* str2);
216 
217 /*
218  * Return less than, equal to, or greater than zero depending on whether
219  * str1 is less than, equal to, or greater than str2.
220  */
221 extern bool
222 CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result);
223 
224 /*
225  * Same as CompareStrings but for atoms.  Don't use this to just test
226  * for equality; use this when you need an ordering on atoms.
227  */
228 extern int32_t
229 CompareAtoms(JSAtom* atom1, JSAtom* atom2);
230 
231 /*
232  * Return true if the string matches the given sequence of ASCII bytes.
233  */
234 extern bool
235 StringEqualsAscii(JSLinearString* str, const char* asciiBytes);
236 
237 /* Return true if the string contains a pattern anywhere inside it. */
238 extern bool
239 StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
240 
241 extern int
242 StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
243 
244 /* Return true if the string contains a pattern at |start|. */
245 extern bool
246 HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
247 
248 template <typename Char1, typename Char2>
249 inline bool
250 EqualChars(const Char1* s1, const Char2* s2, size_t len);
251 
252 template <typename Char1>
253 inline bool
254 EqualChars(const Char1* s1, const Char1* s2, size_t len)
255 {
256     return mozilla::PodEqual(s1, s2, len);
257 }
258 
259 template <typename Char1, typename Char2>
260 inline bool
261 EqualChars(const Char1* s1, const Char2* s2, size_t len)
262 {
263     for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
264         if (*s1 != *s2)
265             return false;
266     }
267     return true;
268 }
269 
270 /*
271  * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
272  * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
273  * and constitute API misuse.
274  */
275 JSString*
276 SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt);
277 
278 /*
279  * Inflate bytes in ASCII encoding to char16_t code units. Return null on error,
280  * otherwise return the char16_t buffer that was malloc'ed. length is updated to
281  * the length of the new string (in char16_t code units). A null char is
282  * appended, but it is not included in the length.
283  */
284 extern char16_t*
285 InflateString(ExclusiveContext* cx, const char* bytes, size_t* length);
286 
287 /*
288  * Inflate bytes to JS chars in an existing buffer. 'dst' must be large
289  * enough for 'srclen' char16_t code units. The buffer is NOT null-terminated.
290  */
291 inline void
292 CopyAndInflateChars(char16_t* dst, const char* src, size_t srclen)
293 {
294     for (size_t i = 0; i < srclen; i++)
295         dst[i] = (unsigned char) src[i];
296 }
297 
298 inline void
299 CopyAndInflateChars(char16_t* dst, const JS::Latin1Char* src, size_t srclen)
300 {
301     for (size_t i = 0; i < srclen; i++)
302         dst[i] = src[i];
303 }
304 
305 /*
306  * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
307  * 'length chars. The buffer is NOT null-terminated. The destination length
308  * must to be initialized with the buffer size and will contain on return the
309  * number of copied bytes.
310  */
311 template <typename CharT>
312 extern bool
313 DeflateStringToBuffer(JSContext* maybecx, const CharT* chars,
314                       size_t charsLength, char* bytes, size_t* length);
315 
316 extern bool
317 str_fromCharCode(JSContext* cx, unsigned argc, Value* vp);
318 
319 extern bool
320 str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval);
321 
322 extern bool
323 str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp);
324 
325 extern bool
326 str_fromCodePoint_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval);
327 
328 /* String methods exposed so they can be installed in the self-hosting global. */
329 
330 extern bool
331 str_includes(JSContext* cx, unsigned argc, Value* vp);
332 
333 extern bool
334 str_indexOf(JSContext* cx, unsigned argc, Value* vp);
335 
336 extern bool
337 str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp);
338 
339 extern bool
340 str_startsWith(JSContext* cx, unsigned argc, Value* vp);
341 
342 extern bool
343 str_toLowerCase(JSContext* cx, unsigned argc, Value* vp);
344 
345 extern bool
346 str_toUpperCase(JSContext* cx, unsigned argc, Value* vp);
347 
348 extern bool
349 str_toString(JSContext* cx, unsigned argc, Value* vp);
350 
351 extern bool
352 str_charAt(JSContext* cx, unsigned argc, Value* vp);
353 
354 extern bool
355 str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res);
356 
357 extern bool
358 str_charCodeAt(JSContext* cx, unsigned argc, Value* vp);
359 
360 extern bool
361 str_contains(JSContext *cx, unsigned argc, Value *vp);
362 
363 extern bool
364 str_endsWith(JSContext* cx, unsigned argc, Value* vp);
365 
366 extern bool
367 str_trim(JSContext* cx, unsigned argc, Value* vp);
368 
369 extern bool
370 str_trimLeft(JSContext* cx, unsigned argc, Value* vp);
371 
372 extern bool
373 str_trimRight(JSContext* cx, unsigned argc, Value* vp);
374 
375 extern bool
376 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp);
377 
378 extern bool
379 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp);
380 
381 #if !EXPOSE_INTL_API
382 extern bool
383 str_localeCompare(JSContext* cx, unsigned argc, Value* vp);
384 #else
385 extern bool
386 str_normalize(JSContext* cx, unsigned argc, Value* vp);
387 #endif
388 
389 extern bool
390 str_concat(JSContext* cx, unsigned argc, Value* vp);
391 
392 /*
393  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
394  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
395  */
396 extern uint32_t
397 OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char);
398 
399 extern size_t
400 PutEscapedStringImpl(char* buffer, size_t size, GenericPrinter* out, JSLinearString* str,
401                      uint32_t quote);
402 
403 template <typename CharT>
404 extern size_t
405 PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars,
406                      size_t length, uint32_t quote);
407 
408 /*
409  * Write str into buffer escaping any non-printable or non-ASCII character
410  * using \escapes for JS string literals.
411  * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
412  * the length of the written output, NOT including the NUL. Thus, a return
413  * value of size or more means that the output was truncated. If buffer
414  * is null, just returns the length of the output. If quote is not 0, it must
415  * be a single or double quote character that will quote the output.
416 */
417 inline size_t
418 PutEscapedString(char* buffer, size_t size, JSLinearString* str, uint32_t quote)
419 {
420     size_t n = PutEscapedStringImpl(buffer, size, nullptr, str, quote);
421 
422     /* PutEscapedStringImpl can only fail with a file. */
423     MOZ_ASSERT(n != size_t(-1));
424     return n;
425 }
426 
427 template <typename CharT>
428 inline size_t
429 PutEscapedString(char* buffer, size_t bufferSize, const CharT* chars, size_t length, uint32_t quote)
430 {
431     size_t n = PutEscapedStringImpl(buffer, bufferSize, nullptr, chars, length, quote);
432 
433     /* PutEscapedStringImpl can only fail with a file. */
434     MOZ_ASSERT(n != size_t(-1));
435     return n;
436 }
437 
438 inline bool
439 EscapedStringPrinter(GenericPrinter& out, JSLinearString* str, uint32_t quote)
440 {
441     return PutEscapedStringImpl(nullptr, 0, &out, str, quote) != size_t(-1);
442 }
443 
444 inline bool
445 EscapedStringPrinter(GenericPrinter& out, const char* chars, size_t length, uint32_t quote)
446 {
447     return PutEscapedStringImpl(nullptr, 0, &out, chars, length, quote) != size_t(-1);
448 }
449 
450 /*
451  * Write str into file escaping any non-printable or non-ASCII character.
452  * If quote is not 0, it must be a single or double quote character that
453  * will quote the output.
454 */
455 inline bool
456 FileEscapedString(FILE* fp, JSLinearString* str, uint32_t quote)
457 {
458     Fprinter out(fp);
459     bool res = EscapedStringPrinter(out, str, quote);
460     out.finish();
461     return res;
462 }
463 
464 inline bool
465 FileEscapedString(FILE* fp, const char* chars, size_t length, uint32_t quote)
466 {
467     Fprinter out(fp);
468     bool res = EscapedStringPrinter(out, chars, length, quote);
469     out.finish();
470     return res;
471 }
472 
473 JSObject*
474 str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep,
475                  uint32_t limit);
476 
477 JSString *
478 str_flat_replace_string(JSContext *cx, HandleString string, HandleString pattern,
479                         HandleString replacement);
480 
481 JSString*
482 str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
483                        HandleString replacement);
484 
485 extern bool
486 StringConstructor(JSContext* cx, unsigned argc, Value* vp);
487 
488 extern bool
489 FlatStringMatch(JSContext* cx, unsigned argc, Value* vp);
490 
491 extern bool
492 FlatStringSearch(JSContext* cx, unsigned argc, Value* vp);
493 
494 } /* namespace js */
495 
496 #endif /* jsstr_h */
497